diff options
author | Ray Chiang <salagadoola@gmail.com> | 2017-11-15 11:13:36 +0800 |
---|---|---|
committer | Thomas Van Lenten <thomasvl@google.com> | 2017-11-14 22:55:58 -0500 |
commit | 66609a28b667de38a73130e5f6e67c2690eddfc9 (patch) | |
tree | 846b232dc158b64e8c35732ce0660c3489f6dff7 /Foundation | |
parent | f778d3416b6c0c993fccbde8702f57280576f700 (diff) |
HTML unescape using unichar array instead of bytes
Diffstat (limited to 'Foundation')
-rw-r--r-- | Foundation/GTMNSString+HTML.m | 35 | ||||
-rw-r--r-- | Foundation/GTMNSString+HTMLTest.m | 7 |
2 files changed, 31 insertions, 11 deletions
diff --git a/Foundation/GTMNSString+HTML.m b/Foundation/GTMNSString+HTML.m index c35e760..168c094 100644 --- a/Foundation/GTMNSString+HTML.m +++ b/Foundation/GTMNSString+HTML.m @@ -486,29 +486,46 @@ static int EscapeMapCompare(const void *ucharVoid, const void *mapVoid) { NSScanner *scanner = [NSScanner scannerWithString:hexSequence]; unsigned value; if ([scanner scanHexInt:&value] && - value < INT_MAX && value > 0 && [scanner scanLocation] == length - 4) { - value = NSSwapHostIntToLittle(value); - NSString *charString = [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding]; - if (charString) { + if (value < USHRT_MAX) { + unichar uchar = (unichar)value; + NSString *charString = [NSString stringWithCharacters:&uchar length:1]; [finalString replaceCharactersInRange:escapeRange withString:charString]; + } else if (value >= 0x10000 && value <= 0x10FFFF) { + // code points in unicode supplementary planes + int subtractedValue = value - 0x10000; + unichar uchars[2]; + uchars[0] = 0xD800 + (subtractedValue >> 10); + uchars[1] = 0xDC00 + (subtractedValue & 0x3FF); + NSString *charString = [NSString stringWithCharacters:uchars length:2]; + if (charString) { + [finalString replaceCharactersInRange:escapeRange withString:charString]; + } } } - } else { // Decimal Sequences { NSString *numberSequence = [escapeString substringWithRange:NSMakeRange(2, length - 3)]; NSScanner *scanner = [NSScanner scannerWithString:numberSequence]; int value; if ([scanner scanInt:&value] && - value < INT_MAX && value > 0 && [scanner scanLocation] == length - 3) { - value = NSSwapHostIntToLittle(value); - NSString *charString = [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding]; - if (charString) { + if (value < USHRT_MAX) { + unichar uchar = (unichar)value; + NSString *charString = [NSString stringWithCharacters:&uchar length:1]; [finalString replaceCharactersInRange:escapeRange withString:charString]; + } else if (value >= 0x10000 && value <= 0x10FFFF) { + // code points in unicode supplementary planes + int subtractedValue = value - 0x10000; + unichar uchars[2]; + uchars[0] = 0xD800 + (subtractedValue >> 10); + uchars[1] = 0xDC00 + (subtractedValue & 0x3FF); + NSString *charString = [NSString stringWithCharacters:uchars length:2]; + if (charString) { + [finalString replaceCharactersInRange:escapeRange withString:charString]; + } } } } diff --git a/Foundation/GTMNSString+HTMLTest.m b/Foundation/GTMNSString+HTMLTest.m index f6fb362..7e85eca 100644 --- a/Foundation/GTMNSString+HTMLTest.m +++ b/Foundation/GTMNSString+HTMLTest.m @@ -226,8 +226,11 @@ XCTAssertEqualObjects([@"<this & that>" gtm_stringByUnescapingFromHTML], @"<this & that>", @"HTML unescaping failed"); - XCTAssertEqualObjects([@"👍" gtm_stringByUnescapingFromHTML], - @"👍", @"HTML unescaping failed"); + XCTAssertEqualObjects([@"𐐷" gtm_stringByUnescapingFromHTML], + @"𐐷", @"HTML unescaping failed"); + + XCTAssertEqualObjects([@"𐐷" gtm_stringByUnescapingFromHTML], + @"𐐷", @"HTML unescaping failed"); } // testStringByUnescapingHTML |