aboutsummaryrefslogtreecommitdiff
path: root/Foundation
diff options
context:
space:
mode:
authorGravatar Ray Chiang <salagadoola@gmail.com>2017-11-15 11:13:36 +0800
committerGravatar Thomas Van Lenten <thomasvl@google.com>2017-11-14 22:55:58 -0500
commit66609a28b667de38a73130e5f6e67c2690eddfc9 (patch)
tree846b232dc158b64e8c35732ce0660c3489f6dff7 /Foundation
parentf778d3416b6c0c993fccbde8702f57280576f700 (diff)
HTML unescape using unichar array instead of bytes
Diffstat (limited to 'Foundation')
-rw-r--r--Foundation/GTMNSString+HTML.m35
-rw-r--r--Foundation/GTMNSString+HTMLTest.m7
2 files changed, 31 insertions, 11 deletions
diff --git a/Foundation/GTMNSString+HTML.m b/Foundation/GTMNSString+HTML.m
index c35e760..168c094 100644
--- a/Foundation/GTMNSString+HTML.m
+++ b/Foundation/GTMNSString+HTML.m
@@ -486,29 +486,46 @@ static int EscapeMapCompare(const void *ucharVoid, const void *mapVoid) {
NSScanner *scanner = [NSScanner scannerWithString:hexSequence];
unsigned value;
if ([scanner scanHexInt:&value] &&
- value < INT_MAX &&
value > 0
&& [scanner scanLocation] == length - 4) {
- value = NSSwapHostIntToLittle(value);
- NSString *charString = [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding];
- if (charString) {
+ if (value < USHRT_MAX) {
+ unichar uchar = (unichar)value;
+ NSString *charString = [NSString stringWithCharacters:&uchar length:1];
[finalString replaceCharactersInRange:escapeRange withString:charString];
+ } else if (value >= 0x10000 && value <= 0x10FFFF) {
+ // code points in unicode supplementary planes
+ int subtractedValue = value - 0x10000;
+ unichar uchars[2];
+ uchars[0] = 0xD800 + (subtractedValue >> 10);
+ uchars[1] = 0xDC00 + (subtractedValue & 0x3FF);
+ NSString *charString = [NSString stringWithCharacters:uchars length:2];
+ if (charString) {
+ [finalString replaceCharactersInRange:escapeRange withString:charString];
+ }
}
}
-
} else {
// Decimal Sequences &#123;
NSString *numberSequence = [escapeString substringWithRange:NSMakeRange(2, length - 3)];
NSScanner *scanner = [NSScanner scannerWithString:numberSequence];
int value;
if ([scanner scanInt:&value] &&
- value < INT_MAX &&
value > 0
&& [scanner scanLocation] == length - 3) {
- value = NSSwapHostIntToLittle(value);
- NSString *charString = [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding];
- if (charString) {
+ if (value < USHRT_MAX) {
+ unichar uchar = (unichar)value;
+ NSString *charString = [NSString stringWithCharacters:&uchar length:1];
[finalString replaceCharactersInRange:escapeRange withString:charString];
+ } else if (value >= 0x10000 && value <= 0x10FFFF) {
+ // code points in unicode supplementary planes
+ int subtractedValue = value - 0x10000;
+ unichar uchars[2];
+ uchars[0] = 0xD800 + (subtractedValue >> 10);
+ uchars[1] = 0xDC00 + (subtractedValue & 0x3FF);
+ NSString *charString = [NSString stringWithCharacters:uchars length:2];
+ if (charString) {
+ [finalString replaceCharactersInRange:escapeRange withString:charString];
+ }
}
}
}
diff --git a/Foundation/GTMNSString+HTMLTest.m b/Foundation/GTMNSString+HTMLTest.m
index f6fb362..7e85eca 100644
--- a/Foundation/GTMNSString+HTMLTest.m
+++ b/Foundation/GTMNSString+HTMLTest.m
@@ -226,8 +226,11 @@
XCTAssertEqualObjects([@"&lt;this &amp; that&gt;" gtm_stringByUnescapingFromHTML],
@"<this & that>", @"HTML unescaping failed");
- XCTAssertEqualObjects([@"&#128077;" gtm_stringByUnescapingFromHTML],
- @"👍", @"HTML unescaping failed");
+ XCTAssertEqualObjects([@"&#x10437;" gtm_stringByUnescapingFromHTML],
+ @"𐐷", @"HTML unescaping failed");
+
+ XCTAssertEqualObjects([@"&#66615;" gtm_stringByUnescapingFromHTML],
+ @"𐐷", @"HTML unescaping failed");
} // testStringByUnescapingHTML