aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Hoa V. Dinh <dinh.viet.hoa@gmail.com>2014-11-05 16:41:32 -0800
committerGravatar Hoa V. Dinh <dinh.viet.hoa@gmail.com>2014-11-05 16:41:32 -0800
commit7ebf0bb87ab92f096c30082cd863ce249f62d93e (patch)
tree816a6e008bcb9739d19507d434e02b34d1c6b2e1 /src
parentc1f9fb2de18971f9a0cc494eb651f0a34caa6871 (diff)
Fixed decoding of UTF-8
Diffstat (limited to 'src')
-rw-r--r--src/core/basetypes/ConvertUTF.c14
-rw-r--r--src/core/basetypes/MCString.cc5
2 files changed, 18 insertions, 1 deletions
diff --git a/src/core/basetypes/ConvertUTF.c b/src/core/basetypes/ConvertUTF.c
index f5be6e79..2984f529 100644
--- a/src/core/basetypes/ConvertUTF.c
+++ b/src/core/basetypes/ConvertUTF.c
@@ -527,7 +527,19 @@ ConversionResult ConvertUTF8toUTF16 (
/* Do this check whether lenient or strict */
if (!isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
- break;
+ if (flags == strictConversion) {
+ /* Abort conversion. */
+ break;
+ } else {
+ /*
+ * Replace the maximal subpart of ill-formed sequence with
+ * replacement character.
+ */
+ source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
+ sourceEnd);
+ *target++ = UNI_REPLACEMENT_CHAR;
+ continue;
+ }
}
/*
* The cases all fall through. See "Note A" below.
diff --git a/src/core/basetypes/MCString.cc b/src/core/basetypes/MCString.cc
index 642fe6a0..cf71218b 100644
--- a/src/core/basetypes/MCString.cc
+++ b/src/core/basetypes/MCString.cc
@@ -1290,6 +1290,11 @@ void String::appendBytes(const char * bytes, unsigned int length, const char * c
}
CFRelease(encodingName);
}
+ if (encoding == kCFStringEncodingUTF8) {
+ appendUTF8CharactersLength(bytes, length);
+ return;
+ }
+
CFStringRef cfStr = CFStringCreateWithBytes(NULL, (const UInt8 *) bytes, (CFIndex) length, encoding, false);
if (cfStr != NULL) {
CFDataRef data = CFStringCreateExternalRepresentation(NULL, cfStr, kCFStringEncodingUTF16LE, '_');