aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Hoa V. Dinh <dinh.viet.hoa@gmail.com>2014-10-23 10:43:53 -0700
committerGravatar Hoa V. Dinh <dinh.viet.hoa@gmail.com>2014-10-23 10:43:53 -0700
commit543073bacd5b0237adfb565bffcb344957f2f113 (patch)
tree8df269b461cfa91f06f1946da1eef24899a8b824
parentfbf32fa6e0c634bc25b2b84ff1cc262a0ab819b1 (diff)
Work in progress on moving to chardet
-rwxr-xr-xbuild-mac/mailcore2.xcodeproj/project.pbxproj8
-rw-r--r--src/core/basetypes/MCData.cc68
2 files changed, 72 insertions, 4 deletions
diff --git a/build-mac/mailcore2.xcodeproj/project.pbxproj b/build-mac/mailcore2.xcodeproj/project.pbxproj
index 1c025752..8154243b 100755
--- a/build-mac/mailcore2.xcodeproj/project.pbxproj
+++ b/build-mac/mailcore2.xcodeproj/project.pbxproj
@@ -4548,8 +4548,8 @@
"LIBRARY_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_LIBRARY_SEARCH_PATHS)";
MACOSX_DEPLOYMENT_TARGET = 10.8;
ONLY_ACTIVE_ARCH = YES;
- OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan/include\" \"$(SRCROOT)/../Externals/icu4c/include\" \"$(SRCROOT)/../Externals/ctemplate/include\" /usr/include/tidy /usr/include/libxml2";
- OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan/lib\" \"$(SRCROOT)/../Externals/icu4c/lib\" \"$(SRCROOT)/../Externals/ctemplate/lib\"";
+ OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan/include\" \"$(SRCROOT)/../Externals/icu4c/include\" \"$(SRCROOT)/../Externals/ctemplate/include\" \"$(SRCROOT)/../Externals/uchardet/include\" /usr/include/tidy /usr/include/libxml2";
+ OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan/lib\" \"$(SRCROOT)/../Externals/icu4c/lib\" \"$(SRCROOT)/../Externals/uchardet/lib\" \"$(SRCROOT)/../Externals/ctemplate/lib\"";
};
name = Debug;
};
@@ -4595,8 +4595,8 @@
"LIBRARY_SEARCH_PATHS[sdk=iphonesimulator*]" = "$(IOS_LIBRARY_SEARCH_PATHS)";
"LIBRARY_SEARCH_PATHS[sdk=macosx*]" = "$(OSX_LIBRARY_SEARCH_PATHS)";
MACOSX_DEPLOYMENT_TARGET = 10.8;
- OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan/include\" \"$(SRCROOT)/../Externals/icu4c/include\" \"$(SRCROOT)/../Externals/ctemplate/include\" /usr/include/tidy /usr/include/libxml2";
- OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan/lib\" \"$(SRCROOT)/../Externals/icu4c/lib\" \"$(SRCROOT)/../Externals/ctemplate/lib\"";
+ OSX_HEADERS_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan/include\" \"$(SRCROOT)/../Externals/icu4c/include\" \"$(SRCROOT)/../Externals/ctemplate/include\" \"$(SRCROOT)/../Externals/uchardet/include\" /usr/include/tidy /usr/include/libxml2";
+ OSX_LIBRARY_SEARCH_PATHS = "\"$(SRCROOT)/../Externals/libetpan/lib\" \"$(SRCROOT)/../Externals/icu4c/lib\" \"$(SRCROOT)/../Externals/uchardet/lib\" \"$(SRCROOT)/../Externals/ctemplate/lib\"";
};
name = Release;
};
diff --git a/src/core/basetypes/MCData.cc b/src/core/basetypes/MCData.cc
index 60b8ff8f..f9a2a428 100644
--- a/src/core/basetypes/MCData.cc
+++ b/src/core/basetypes/MCData.cc
@@ -1,9 +1,15 @@
#include "MCData.h"
+#define USE_UCHARDET 1
+
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
+#if USE_UCHARDET
+#include <uchardet/uchardet.h>
+#else
#include <unicode/ucsdet.h>
+#endif
#include <libetpan/libetpan.h>
#include <iconv.h>
#if __APPLE__
@@ -189,6 +195,7 @@ static bool isHintCharsetValid(String * hintCharset)
if (knownCharset == NULL) {
knownCharset = new Set();
+#if !USE_UCHARDET
UCharsetDetector * detector;
UEnumeration * iterator;
UErrorCode err = U_ZERO_ERROR;
@@ -205,6 +212,41 @@ static bool isHintCharsetValid(String * hintCharset)
}
uenum_close(iterator);
ucsdet_close(detector);
+#else
+ const char * charset_list[] = {
+ "Big5",
+ "EUC-JP",
+ "EUC-KR",
+ "x-euc-tw",
+ "gb18030",
+ "ISO-8859-8",
+ "windows-1255",
+ "windows-1252",
+ "Shift_JIS",
+ "UTF-8",
+ "UTF-16",
+ "HZ-GB-2312",
+ "ISO-2022-CN",
+ "ISO-2022-JP",
+ "ISO-2022-KR",
+ "ISO-8859-5"
+ "windows-1251"
+ "KOI8-R"
+ "x-mac-cyrillic"
+ "IBM866"
+ "IBM855"
+ "ISO-8859-7"
+ "windows-1253"
+ "ISO-8859-2"
+ "windows-1250"
+ "TIS-620"
+ };
+ for(unsigned int i = 0 ; i < sizeof(charset_list) / sizeof(charset_list[0]) ; i ++) {
+ String * str = String::stringWithUTF8Characters(charset_list[i]);
+ str = str->lowercaseString();
+ knownCharset->addObject(str);
+ }
+#endif
}
pthread_mutex_unlock(&lock);
@@ -309,6 +351,7 @@ String * Data::stringWithDetectedCharset(String * hintCharset, bool isHTML)
String * Data::charsetWithFilteredHTMLWithoutHint(bool filterHTML)
{
+#if !USE_UCHARDET
UCharsetDetector * detector;
const UCharsetMatch * match;
UErrorCode err = U_ZERO_ERROR;
@@ -330,6 +373,9 @@ String * Data::charsetWithFilteredHTMLWithoutHint(bool filterHTML)
ucsdet_close(detector);
return result;
+#else
+#warning need to be implemented
+#endif
}
String * Data::charsetWithFilteredHTML(bool filterHTML, String * hintCharset)
@@ -337,6 +383,7 @@ String * Data::charsetWithFilteredHTML(bool filterHTML, String * hintCharset)
if (hintCharset == NULL)
return charsetWithFilteredHTMLWithoutHint(filterHTML);
+#if !USE_UCHARDET
const UCharsetMatch ** matches;
int32_t matchesCount;
UCharsetDetector * detector;
@@ -400,6 +447,27 @@ String * Data::charsetWithFilteredHTML(bool filterHTML, String * hintCharset)
result = hintCharset;
return result;
+#else
+ String * result;
+ uchardet_t ud = uchardet_new();
+ int r = uchardet_handle_data(ud, bytes(), length());
+ if (r == 0) {
+ uchardet_data_end(ud);
+ const char * charset = uchardet_get_charset(ud);
+ if (charset[0] == 0) {
+ result = hintCharset;
+ }
+ else {
+ result = String::stringWithUTF8Characters(charset);
+ }
+ }
+ else {
+ result = hintCharset;
+ }
+ uchardet_delete(ud);
+
+ return result;
+#endif
}
Data * Data::dataWithContentsOfFile(String * filename)