aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Hoa V. Dinh <dinh.viet.hoa@gmail.com>2014-10-23 16:20:25 -0700
committerGravatar Hoa V. Dinh <dinh.viet.hoa@gmail.com>2014-10-23 16:20:25 -0700
commitaa94649aabb8bf5f83155dd09d2f3db367c6225f (patch)
tree24e6b8c4dca9cf9d082589646c88fa30f3d17629
parent543073bacd5b0237adfb565bffcb344957f2f113 (diff)
Implemented charset detection with uchardet
-rw-r--r--src/core/basetypes/MCData.cc14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/core/basetypes/MCData.cc b/src/core/basetypes/MCData.cc
index f9a2a428..13c5fd61 100644
--- a/src/core/basetypes/MCData.cc
+++ b/src/core/basetypes/MCData.cc
@@ -374,7 +374,19 @@ String * Data::charsetWithFilteredHTMLWithoutHint(bool filterHTML)
return result;
#else
-#warning need to be implemented
+ String * result = NULL;
+ uchardet_t ud = uchardet_new();
+ int r = uchardet_handle_data(ud, bytes(), length());
+ if (r == 0) {
+ uchardet_data_end(ud);
+ const char * charset = uchardet_get_charset(ud);
+ if (charset[0] != 0) {
+ result = String::stringWithUTF8Characters(charset);
+ }
+ }
+ uchardet_delete(ud);
+
+ return result;
#endif
}