diff options
author | wm4 <wm4@nowhere> | 2016-12-09 19:48:47 +0100 |
---|---|---|
committer | wm4 <wm4@nowhere> | 2016-12-09 19:48:59 +0100 |
commit | 0eb87e1baff89e7a979df23f31ca918f9d107e44 (patch) | |
tree | 39977001211eeb6f985f5e8e3bae4074011251b9 /misc/charset_conv.c | |
parent | 2aebf3e482a7b9ee7bc4e46e3c72b3a20278f5b4 (diff) |
charset_conv: drop enca and libguess support
Enca is dead. libguess is relatively useless due to not having an
universal detection mode. On the other hand, libuchardet is actively
developed.
Manpages changes in the following commit.
Diffstat (limited to 'misc/charset_conv.c')
-rw-r--r-- | misc/charset_conv.c | 67 |
1 files changed, 0 insertions, 67 deletions
diff --git a/misc/charset_conv.c b/misc/charset_conv.c index 48e4e9a5ae..53e3a9db69 100644 --- a/misc/charset_conv.c +++ b/misc/charset_conv.c @@ -28,14 +28,6 @@ #include "common/msg.h" -#if HAVE_ENCA -#include <enca.h> -#endif - -#if HAVE_LIBGUESS -#include <libguess.h> -#endif - #if HAVE_UCHARDET #include <uchardet.h> #endif @@ -111,57 +103,6 @@ static const char *ms_bom_guess(bstr buf) return NULL; } -#if HAVE_ENCA -static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) -{ - // Do our own UTF-8 detection, because ENCA seems to get it wrong sometimes - // (suggested by divVerent). Explicitly allow cut-off UTF-8. - if (bstr_validate_utf8(buf) > -8) - return "UTF-8"; - - if (!language || !language[0]) - language = "__"; // neutral language - - const char *detected_cp = NULL; - - EncaAnalyser analyser = enca_analyser_alloc(language); - if (analyser) { - enca_set_termination_strictness(analyser, 0); - EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); - const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); - if (tmp && enc.charset != ENCA_CS_UNKNOWN) - detected_cp = tmp; - enca_analyser_free(analyser); - } else { - mp_err(log, "ENCA doesn't know language '%s'\n", language); - size_t langcnt; - const char **languages = enca_get_languages(&langcnt); - mp_err(log, "ENCA supported languages:"); - for (int i = 0; i < langcnt; i++) - mp_err(log, " %s", languages[i]); - mp_err(log, "\n"); - free(languages); - } - - return detected_cp; -} -#endif - -#if HAVE_LIBGUESS -static const char *libguess_guess(struct mp_log *log, bstr buf, - const char *language) -{ - if (!language || !language[0] || strcmp(language, "help") == 0) { - mp_err(log, "libguess needs a language: " - "japanese taiwanese chinese korean russian arabic turkish " - "greek hebrew polish baltic\n"); - return NULL; - } - - return libguess_determine_encoding(buf.start, buf.len, language); -} -#endif - #if HAVE_UCHARDET static const char *mp_uchardet(void *talloc_ctx, struct mp_log *log, bstr buf) { @@ -232,14 +173,6 @@ const char *mp_charset_guess(void *talloc_ctx, struct mp_log *log, bstr buf, type = bstr0("auto"); } -#if HAVE_ENCA - if (bstrcasecmp0(type, "enca") == 0) - res = enca_guess(log, buf, lang); -#endif -#if HAVE_LIBGUESS - if (bstrcasecmp0(type, "guess") == 0) - res = libguess_guess(log, buf, lang); -#endif #if HAVE_UCHARDET if (bstrcasecmp0(type, "uchardet") == 0) res = mp_uchardet(talloc_ctx, log, buf); |