From 6bc16a677e3523794b118f2a4addfa27cbce4e21 Mon Sep 17 00:00:00 2001 From: "Hoa V. Dinh" Date: Fri, 31 Oct 2014 15:24:25 -0700 Subject: Added unit test for charset detection --- unittest/data/charset-detection/input/big5.txt | 1 + unittest/data/charset-detection/input/gb18030.txt | 1 + unittest/data/charset-detection/input/shift_jis.txt | 1 + unittest/data/charset-detection/input/utf-8.txt | 1 + unittest/unittest.mm | 18 ++++++++++++++++++ 5 files changed, 22 insertions(+) create mode 100644 unittest/data/charset-detection/input/big5.txt create mode 100644 unittest/data/charset-detection/input/gb18030.txt create mode 100644 unittest/data/charset-detection/input/shift_jis.txt create mode 100644 unittest/data/charset-detection/input/utf-8.txt (limited to 'unittest') diff --git a/unittest/data/charset-detection/input/big5.txt b/unittest/data/charset-detection/input/big5.txt new file mode 100644 index 00000000..59db9548 --- /dev/null +++ b/unittest/data/charset-detection/input/big5.txt @@ -0,0 +1 @@ +羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ羉砰いゅ \ No newline at end of file diff --git a/unittest/data/charset-detection/input/gb18030.txt b/unittest/data/charset-detection/input/gb18030.txt new file mode 100644 index 00000000..962df87d --- /dev/null +++ b/unittest/data/charset-detection/input/gb18030.txt @@ -0,0 +1 @@ +简体中文简体中文简体中文简体中文简体中文简体中文简体中文简体中文简体中文简体中文简体中文 \ No newline at end of file diff --git a/unittest/data/charset-detection/input/shift_jis.txt b/unittest/data/charset-detection/input/shift_jis.txt new file mode 100644 index 00000000..a580281d --- /dev/null +++ b/unittest/data/charset-detection/input/shift_jis.txt @@ -0,0 +1 @@ +擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅擔杮岅 diff --git a/unittest/data/charset-detection/input/utf-8.txt b/unittest/data/charset-detection/input/utf-8.txt new file mode 100644 index 00000000..cd66b082 --- /dev/null +++ b/unittest/data/charset-detection/input/utf-8.txt @@ -0,0 +1 @@ +姹夊瓧婕㈠瓧绲变竴绶ㄧ⒓钀湅纰 diff --git a/unittest/unittest.mm b/unittest/unittest.mm index ee543766..b125231f 100644 --- a/unittest/unittest.mm +++ b/unittest/unittest.mm @@ -86,6 +86,7 @@ NSString * _parserPath; NSString * _builderOutputPath; NSString * _parserOutputPath; + NSString * _charsetDetectionPath; } - (void)setUp { @@ -96,6 +97,7 @@ _builderOutputPath = [_mainPath stringByAppendingPathComponent:@"builder/output"]; _parserPath = [_mainPath stringByAppendingPathComponent:@"parser/input"]; _parserOutputPath = [_mainPath stringByAppendingPathComponent:@"parser/output"]; + _charsetDetectionPath = [_mainPath stringByAppendingPathComponent:@"charset-detection"]; } - (void)tearDown { @@ -192,4 +194,20 @@ } } +- (void)testCharsetDetection { + NSArray * list = [[NSFileManager defaultManager] subpathsAtPath:_charsetDetectionPath]; + for(NSString * name in list) { + NSString * path = [_charsetDetectionPath stringByAppendingPathComponent:name]; + BOOL isDirectory = NO; + [[NSFileManager defaultManager] fileExistsAtPath:path isDirectory:&isDirectory]; + if (isDirectory) { + continue; + } + NSData * data = [NSData dataWithContentsOfFile:path]; + NSString * charset = MCO_TO_OBJC([data mco_mcData]->charsetWithFilteredHTML(false)); + charset = [charset lowercaseString]; + XCTAssertEqualObjects([[name lastPathComponent] stringByDeletingPathExtension], charset); + } +} + @end -- cgit v1.2.3