diff options
author | Hoa V. Dinh <dinh.viet.hoa@gmail.com> | 2014-11-05 16:41:32 -0800 |
---|---|---|
committer | Hoa V. Dinh <dinh.viet.hoa@gmail.com> | 2014-11-05 16:41:32 -0800 |
commit | 7ebf0bb87ab92f096c30082cd863ce249f62d93e (patch) | |
tree | 816a6e008bcb9739d19507d434e02b34d1c6b2e1 /src | |
parent | c1f9fb2de18971f9a0cc494eb651f0a34caa6871 (diff) |
Fixed decoding of UTF-8
Diffstat (limited to 'src')
-rw-r--r-- | src/core/basetypes/ConvertUTF.c | 14 | ||||
-rw-r--r-- | src/core/basetypes/MCString.cc | 5 |
2 files changed, 18 insertions, 1 deletions
diff --git a/src/core/basetypes/ConvertUTF.c b/src/core/basetypes/ConvertUTF.c index f5be6e79..2984f529 100644 --- a/src/core/basetypes/ConvertUTF.c +++ b/src/core/basetypes/ConvertUTF.c @@ -527,7 +527,19 @@ ConversionResult ConvertUTF8toUTF16 ( /* Do this check whether lenient or strict */ if (!isLegalUTF8(source, extraBytesToRead+1)) { result = sourceIllegal; - break; + if (flags == strictConversion) { + /* Abort conversion. */ + break; + } else { + /* + * Replace the maximal subpart of ill-formed sequence with + * replacement character. + */ + source += findMaximalSubpartOfIllFormedUTF8Sequence(source, + sourceEnd); + *target++ = UNI_REPLACEMENT_CHAR; + continue; + } } /* * The cases all fall through. See "Note A" below. diff --git a/src/core/basetypes/MCString.cc b/src/core/basetypes/MCString.cc index 642fe6a0..cf71218b 100644 --- a/src/core/basetypes/MCString.cc +++ b/src/core/basetypes/MCString.cc @@ -1290,6 +1290,11 @@ void String::appendBytes(const char * bytes, unsigned int length, const char * c } CFRelease(encodingName); } + if (encoding == kCFStringEncodingUTF8) { + appendUTF8CharactersLength(bytes, length); + return; + } + CFStringRef cfStr = CFStringCreateWithBytes(NULL, (const UInt8 *) bytes, (CFIndex) length, encoding, false); if (cfStr != NULL) { CFDataRef data = CFStringCreateExternalRepresentation(NULL, cfStr, kCFStringEncodingUTF16LE, '_'); |