diff options
author | Hoa V. Dinh <dinh.viet.hoa@gmail.com> | 2014-11-05 18:24:00 -0800 |
---|---|---|
committer | Hoa V. Dinh <dinh.viet.hoa@gmail.com> | 2014-11-05 18:24:00 -0800 |
commit | c3727496446f2aba63b3a032e551c76eb28db5eb (patch) | |
tree | 7c4e8888bb426fea501ab805afd68321951c3b97 | |
parent | aaf0e4003700b1fcde502410476e859180de0286 (diff) |
Improved charset conversion
-rw-r--r-- | src/core/basetypes/MCData.cc | 20 | ||||
-rw-r--r-- | src/core/basetypes/MCString.cc | 41 |
2 files changed, 28 insertions, 33 deletions
diff --git a/src/core/basetypes/MCData.cc b/src/core/basetypes/MCData.cc index bbe7ee41..6f927cf4 100644 --- a/src/core/basetypes/MCData.cc +++ b/src/core/basetypes/MCData.cc @@ -315,25 +315,7 @@ String * Data::stringWithDetectedCharset(String * hintCharset, bool isHTML) // Remove whitespace at the end of the string to fix conversion. if (charset->isEqual(MCSTR("iso-2022-jp-2"))) { - const char * theBytes; - Data * data; - - theBytes = bytes(); - data = this; - if (length() >= 2) { - unsigned int idx; - - idx = length(); - while ((theBytes[idx - 1] == '\n') || (theBytes[idx - 1] == '\r')) { - idx --; - if (idx == 0) - break; - } - - if (idx != length()) { - data = Data::dataWithBytes(theBytes, idx); - } - } + Data * data = this; result = data->stringWithCharset("iso-2022-jp-2"); if (result == NULL) { result = data->stringWithCharset("iso-2022-jp"); diff --git a/src/core/basetypes/MCString.cc b/src/core/basetypes/MCString.cc index cf71218b..febb68b8 100644 --- a/src/core/basetypes/MCString.cc +++ b/src/core/basetypes/MCString.cc @@ -1295,23 +1295,36 @@ void String::appendBytes(const char * bytes, unsigned int length, const char * c return; } - CFStringRef cfStr = CFStringCreateWithBytes(NULL, (const UInt8 *) bytes, (CFIndex) length, encoding, false); - if (cfStr != NULL) { - CFDataRef data = CFStringCreateExternalRepresentation(NULL, cfStr, kCFStringEncodingUTF16LE, '_'); - if (data != NULL) { - UChar * fixedData = (UChar *) malloc(CFDataGetLength(data)); - memcpy(fixedData, CFDataGetBytePtr(data), CFDataGetLength(data)); - unsigned int length = (unsigned int) CFDataGetLength(data) / 2; - for(int32_t i = 0 ; i < length ; i ++) { - if (fixedData[i] == 0) { - fixedData[i] = ' '; + bool converted = false; + int conversionCount = 0; + while (!converted) { + CFStringRef cfStr = CFStringCreateWithBytes(NULL, (const UInt8 *) bytes, (CFIndex) length, encoding, false); + if (cfStr != NULL) { + converted = true; + CFDataRef data = CFStringCreateExternalRepresentation(NULL, cfStr, kCFStringEncodingUTF16LE, '_'); + if (data != NULL) { + UChar * fixedData = (UChar *) malloc(CFDataGetLength(data)); + memcpy(fixedData, CFDataGetBytePtr(data), CFDataGetLength(data)); + unsigned int length = (unsigned int) CFDataGetLength(data) / 2; + for(int32_t i = 0 ; i < length ; i ++) { + if (fixedData[i] == 0) { + fixedData[i] = ' '; + } } + appendCharactersLength(fixedData, length); + free(fixedData); + CFRelease(data); + } + CFRelease(cfStr); + } + else { + length --; + conversionCount ++; + if (conversionCount > 10) { + // failed. + break; } - appendCharactersLength(fixedData, length); - free(fixedData); - CFRelease(data); } - CFRelease(cfStr); } #else UErrorCode err; |