4 files changed, 47 insertions, 64 deletions
diff --git a/src/env_universal_common.cpp b/src/env_universal_common.cpp
index f7c07ea2..bee25a1e 100644
--- a/src/env_universal_common.cpp
+++ b/src/env_universal_common.cpp
@@ -958,7 +958,7 @@ var_table_t env_universal_t::read_message_internal(int fd)
             // Process it if it's a newline (which is true if we are before the end of the buffer)
             if (cursor < bufflen && ! line.empty())
             {
-                if (utf8_to_wchar_string(line, &wide_line))
+                if (utf8_to_wchar(line.data(), line.size(), &wide_line, 0))
                 {
                     env_universal_t::parse_message_internal(wide_line, &result, &storage);
                 }
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index c5fa4385..bcfff032 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -1057,7 +1057,16 @@ static void test_utf82wchar(const char *src, size_t slen, const wchar_t *dst, si
 
     do
     {
-        size = utf8_to_wchar(src, slen, mem, dlen, flags);
+        if (mem == NULL)
+        {
+            size = utf8_to_wchar(src, slen, NULL, flags);
+        }
+        else
+        {
+            std::wstring buff;
+            size = utf8_to_wchar(src, slen, &buff, flags);
+            std::copy(buff.begin(), buff.begin() + std::min(dlen, buff.size()), mem);
+        }
         if (res != size)
         {
             err(L"u2w: %s: FAILED (rv: %lu, must be %lu)", descr, size, res);
@@ -1219,8 +1228,10 @@ static void test_utf8()
                                                UTF8_IGNORE_ERROR, sizeof(wb1) / sizeof(*wb1), "ignore bad chars");
     test_utf82wchar(um, sizeof(um), wm, sizeof(wm) / sizeof(*wm), 0,
                     sizeof(wm) / sizeof(*wm), "mixed languages");
-    test_utf82wchar(um, sizeof(um), wm, sizeof(wm) / sizeof(*wm) - 1, 0,
-                    0, "boundaries -1");
+    // PCA this test was to ensure that if the output buffer was too small, we'd get 0
+    // we no longer have statically sized result buffers, so this test is disabled
+    //    test_utf82wchar(um, sizeof(um), wm, sizeof(wm) / sizeof(*wm) - 1, 0,
+    //                    0, "boundaries -1");
     test_utf82wchar(um, sizeof(um), wm, sizeof(wm) / sizeof(*wm) + 1, 0,
                     sizeof(wm) / sizeof(*wm), "boundaries +1");
     test_utf82wchar(um, sizeof(um), NULL, 0, 0,
@@ -1235,8 +1246,11 @@ static void test_utf8()
                     "invalid params, src buf not NULL");
     test_utf82wchar((const char *)NULL, 10, NULL, 0, 0, 0,
                     "invalid params, src length is not 0");
-    test_utf82wchar(u1, sizeof(u1), w1, 0, 0, 0,
-                    "invalid params, dst is not NULL");
+    
+    // PCA this test was to ensure that converting into a zero length output buffer would return 0
+    // we no longer statically size output buffers, so the test is disabled
+    //    test_utf82wchar(u1, sizeof(u1), w1, 0, 0, 0,
+    //                    "invalid params, dst is not NULL");
 
     /*
      * UCS-4 -> UTF-8 string.
diff --git a/src/utf8.cpp b/src/utf8.cpp
index 62453be4..9bd6edf2 100644
--- a/src/utf8.cpp
+++ b/src/utf8.cpp
@@ -36,12 +36,14 @@
 typedef wchar_t utf8_wchar_t;
 #define UTF8_WCHAR_MAX ((size_t)std::numeric_limits<utf8_wchar_t>::max())
 
+typedef std::basic_string<utf8_wchar_t> utf8_wstring_t;
+
 bool is_wchar_ucs2()
 {
     return UTF8_WCHAR_MAX <= 0xFFFF;
 }
 
-static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wchar_t *out, size_t outsize, int flags);
+static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wstring_t *result, int flags);
 static size_t wchar_to_utf8_internal(const utf8_wchar_t *in, size_t insize, char *out, size_t outsize, int flags);
 
 static bool safe_copy_wchar_to_utf8_wchar(const wchar_t *in, utf8_wchar_t *out, size_t count)
@@ -60,32 +62,6 @@ static bool safe_copy_wchar_to_utf8_wchar(const wchar_t *in, utf8_wchar_t *out,
     return result;
 }
 
-bool utf8_to_wchar_string(const std::string &str, std::wstring *result)
-{
-    result->clear();
-    const size_t inlen = str.size();
-    if (inlen == 0)
-    {
-        return true;
-    }
-
-    bool success = false;
-    const char *input = str.c_str();
-    size_t outlen = utf8_to_wchar(input, inlen, NULL, 0, 0);
-    if (outlen > 0)
-    {
-        wchar_t *tmp = new wchar_t[outlen];
-        size_t outlen2 = utf8_to_wchar(input, inlen, tmp, outlen, 0);
-        if (outlen2 > 0)
-        {
-            result->assign(tmp, outlen2);
-            success = true;
-        }
-        delete[] tmp;
-    }
-    return success;
-}
-
 bool wchar_to_utf8_string(const std::wstring &str, std::string *result)
 {
     result->clear();
@@ -112,9 +88,9 @@ bool wchar_to_utf8_string(const std::wstring &str, std::string *result)
     return success;
 }
 
-size_t utf8_to_wchar(const char *in, size_t insize, wchar_t *out, size_t outsize, int flags)
+size_t utf8_to_wchar(const char *in, size_t insize, std::wstring *out, int flags)
 {
-    if (in == NULL || insize == 0 || (outsize == 0 && out != NULL))
+    if (in == NULL || insize == 0)
     {
         return 0;
     }
@@ -122,21 +98,20 @@ size_t utf8_to_wchar(const char *in, size_t insize, wchar_t *out, size_t outsize
     size_t result;
     if (sizeof(wchar_t) == sizeof(utf8_wchar_t))
     {
-        result = utf8_to_wchar_internal(in, insize, reinterpret_cast<utf8_wchar_t *>(out), outsize, flags);
+        result = utf8_to_wchar_internal(in, insize, reinterpret_cast<utf8_wstring_t *>(out), flags);
+    }
+    else if (out == NULL)
+    {
+        result = utf8_to_wchar_internal(in, insize, NULL, flags);
     }
     else
     {
-        // Allocate a temporary buffer to hold the output
-        // note: outsize may be 0
-        utf8_wchar_t *tmp_output = new utf8_wchar_t[outsize];
-
-        // Invoke the conversion with the temporary
-        result = utf8_to_wchar_internal(in, insize, tmp_output, outsize, flags);
-
-        // Copy back from tmp to the function's output, then clean it up
-        size_t amount_to_copy = std::min(result, outsize);
-        std::copy(tmp_output, tmp_output + amount_to_copy, out);
-        delete[] tmp_output;
+        // Allocate a temporary buffer to hold the output,
+        // invoke the conversion with the temporary,
+        // and then copy it back
+        utf8_wstring_t tmp_output;
+        result = utf8_to_wchar_internal(in, insize, &tmp_output, flags);
+        out->insert(out->end(), tmp_output.begin(), tmp_output.end());
     }
     return result;
 }
@@ -213,9 +188,7 @@ __utf8_forbitten(unsigned char octet)
  *	It takes the following arguments:
  *	in	- input UTF-8 string. It can be null-terminated.
  *	insize	- size of input string in bytes.
- *	out	- result buffer for UCS-2/4 string. If out is NULL,
- *		function returns size of result buffer.
- *	outsize - size of out buffer in wide characters.
+ *	out_string	- result buffer for UCS-2/4 string.
  *
  * RETURN VALUES
  *	The function returns size of result buffer (in wide characters).
@@ -231,19 +204,21 @@ __utf8_forbitten(unsigned char octet)
  *	   not prepare buffer in advance (\0 terminate) but after calling this
  *	   function.
  */
-static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wchar_t *out, size_t outsize, int flags)
+static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wstring_t *out_string, int flags)
 {
     unsigned char *p, *lim;
-    utf8_wchar_t *wlim, high;
+    utf8_wchar_t high;
     size_t n, total, i, n_bits;
 
-    if (in == NULL || insize == 0 || (outsize == 0 && out != NULL))
+    if (in == NULL || insize == 0)
         return (0);
+    
+    if (out_string != NULL)
+        out_string->clear();
 
     total = 0;
     p = (unsigned char *)in;
     lim = p + insize;
-    wlim = out + outsize;
 
     for (; p < lim; p += n)
     {
@@ -319,15 +294,10 @@ static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wchar_t
         }
 
         total++;
-
-        if (out == NULL)
+        if (out_string == NULL)
             continue;
 
-        if (out >= wlim)
-            return (0);		/* no space left */
-
         uint32_t out_val = 0;
-        *out = 0;
         n_bits = 0;
         for (i = 1; i < n; i++)
         {
@@ -364,7 +334,7 @@ static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wchar_t
         }
         else
         {
-            *out++ = out_val;
+            out_string->push_back(out_val);
         }
     }
 
diff --git a/src/utf8.h b/src/utf8.h
index 1c9923db..33ed6a5e 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -28,11 +28,10 @@
 #define UTF8_SKIP_BOM			0x02
 
 /* Convert a string between UTF8 and UCS-2/4 (depending on size of wchar_t). Returns true if successful, storing the result of the conversion in *result */
-bool utf8_to_wchar_string(const std::string &input, std::wstring *result);
 bool wchar_to_utf8_string(const std::wstring &input, std::string *result);
 
-/* Variants exposed for testing */
-size_t utf8_to_wchar(const char *in, size_t insize, wchar_t *out, size_t outsize, int flags);
+/* Convert a string between UTF8 and UCS-2/4 (depending on size of wchar_t). Returns nonzero if successful, storing the result of the conversion in *out */
+size_t utf8_to_wchar(const char *in, size_t insize, std::wstring *out, int flags);
 size_t wchar_to_utf8(const wchar_t *in, size_t insize, char *out, size_t outsize, int flags);
 
 bool is_wchar_ucs2();