diff options
author | Hoa V. Dinh <dinh.viet.hoa@gmail.com> | 2014-10-23 16:20:25 -0700 |
---|---|---|
committer | Hoa V. Dinh <dinh.viet.hoa@gmail.com> | 2014-10-23 16:20:25 -0700 |
commit | aa94649aabb8bf5f83155dd09d2f3db367c6225f (patch) | |
tree | 24e6b8c4dca9cf9d082589646c88fa30f3d17629 | |
parent | 543073bacd5b0237adfb565bffcb344957f2f113 (diff) |
Implemented charset detection with uchardet
-rw-r--r-- | src/core/basetypes/MCData.cc | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/src/core/basetypes/MCData.cc b/src/core/basetypes/MCData.cc index f9a2a428..13c5fd61 100644 --- a/src/core/basetypes/MCData.cc +++ b/src/core/basetypes/MCData.cc @@ -374,7 +374,19 @@ String * Data::charsetWithFilteredHTMLWithoutHint(bool filterHTML) return result; #else -#warning need to be implemented + String * result = NULL; + uchardet_t ud = uchardet_new(); + int r = uchardet_handle_data(ud, bytes(), length()); + if (r == 0) { + uchardet_data_end(ud); + const char * charset = uchardet_get_charset(ud); + if (charset[0] != 0) { + result = String::stringWithUTF8Characters(charset); + } + } + uchardet_delete(ud); + + return result; #endif } |