diff options
author | 2007-05-16 05:46:10 +1000 | |
---|---|---|
committer | 2007-05-16 05:46:10 +1000 | |
commit | 1c6236831c03727b9955b1b07d2f7489f88b10fe (patch) | |
tree | dba6520bf67bb795857ba83e9e51fdf2162cf3ec /env_universal_common.c | |
parent | c1b4fa847f626b3e4fc2b2393dfba1e1c1f446c3 (diff) |
Fix character set conversions in both directions in universal variable code. This fixes encoding issues on NetBSD. This patch was written by Claes Nästén.
darcs-hash:20070515194610-ac50b-9da1b06c8235dd94ae26074f2a42809d5b856743.gz
Diffstat (limited to 'env_universal_common.c')
-rw-r--r-- | env_universal_common.c | 127 |
1 files changed, 78 insertions, 49 deletions
diff --git a/env_universal_common.c b/env_universal_common.c index df1dcb2d..4a9e3abb 100644 --- a/env_universal_common.c +++ b/env_universal_common.c @@ -125,6 +125,58 @@ static int get_names_show_exported; */ static int get_names_show_unexported; +/** + List of names for the UTF-8 character set. + */ +static char *iconv_utf8_names[]= + { + "utf-8", "UTF-8", + "utf8", "UTF8", + 0 + } + ; + +/** + List of wide character names, undefined byte length. + */ +static char *iconv_wide_names_unknown[]= + { + "wchar_t", "WCHAR_T", + "wchar", "WCHAR", + 0 + } + ; + +/** + List of wide character names, 4 bytes long. + */ +static char *iconv_wide_names_4[]= + { + "wchar_t", "WCHAR_T", + "wchar", "WCHAR", + "ucs-4", "UCS-4", + "ucs4", "UCS4", + "utf-32", "UTF-32", + "utf32", "UTF32", + 0 + } + ; + +/** + List of wide character names, 2 bytes long. + */ +static char *iconv_wide_names_2[]= + { + "wchar_t", "WCHAR_T", + "wchar", "WCHAR", + "ucs-2", "UCS-2", + "ucs2", "UCS2", + "utf-16", "UTF-16", + "utf16", "UTF16", + 0 + } + ; + wchar_t *utf2wcs( const char *in ) { @@ -139,53 +191,21 @@ wchar_t *utf2wcs( const char *in ) really the character set used by wchar_t, but it is the best assumption we can make. */ - char *to_name1[]= - { - "wchar_t", "WCHAR_T", - "wchar", "WCHAR", - 0 - } - ; - - char *to_name4[]= - { - "wchar_t", "WCHAR_T", - "wchar", "WCHAR", - "ucs-4", "UCS-4", - "ucs4", "UCS4", - "utf-32", "UTF-32", - "utf32", "UTF32", - 0 - } - ; - - char *to_name2[]= - { - "wchar_t", "WCHAR_T", - "wchar", "WCHAR", - "ucs-2", "UCS-2", - "ucs2", "UCS2", - "utf-16", "UTF-16", - "utf16", "UTF16", - 0 - } - ; - char **to_name=0; switch (sizeof (wchar_t)) { case 2: - to_name = to_name2; + to_name = iconv_wide_names_2; break; case 4: - to_name = to_name4; + to_name = iconv_wide_names_4; break; default: - to_name = to_name1; + to_name = iconv_wide_names_unknown; break; } @@ -193,12 +213,7 @@ wchar_t *utf2wcs( const char *in ) /* The line protocol fish uses is always utf-8. */ - char *from_name[]= - { - "utf-8", "UTF-8", - "utf8", "UTF8", 0 - } - ; + char **from_name = iconv_utf8_names; size_t in_len = strlen( in ); size_t out_len = sizeof( wchar_t )*(in_len+1); @@ -264,17 +279,31 @@ char *wcs2utf( const wchar_t *in ) char *char_in = (char *)in; char *out; - char *from_name[]= - { - "wchar_t", "WCHAR_T", "wchar", "WCHAR", 0 - } - ; + /* + Try to convert to wchar_t. If that is not a valid character set, + try various names for ucs-4. We can't be sure that ucs-4 is + really the character set used by wchar_t, but it is the best + assumption we can make. + */ + char **from_name=0; - char *to_name[]= + switch (sizeof (wchar_t)) { - "utf-8", "UTF-8", "utf8", "UTF8", 0 + + case 2: + from_name = iconv_wide_names_2; + break; + + case 4: + from_name = iconv_wide_names_4; + break; + + default: + from_name = iconv_wide_names_unknown; + break; } - ; + + char **to_name = iconv_utf8_names; size_t in_len = wcslen( in ); size_t out_len = sizeof( char )*( (MAX_UTF8_BYTES*in_len)+1); |