From 188365cc3ae4cf5e3b0b2001a3558555727e50da Mon Sep 17 00:00:00 2001 From: Joey Zheng Date: Fri, 31 Aug 2012 15:30:15 +0800 Subject: add Chinese Language support. --- junklib.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'junklib.c') diff --git a/junklib.c b/junklib.c index d31a9435..0d142337 100644 --- a/junklib.c +++ b/junklib.c @@ -669,6 +669,24 @@ can_be_russian (const signed char *str) { return 0; } +static int +can_be_chinese (const signed char *str) { + for (; *str; str++) { + if (((unsigned char) *str >= 0x81 && (unsigned char) *str <= 0xFE ) + && ((unsigned char) *(str+1) >= 0x30 && (unsigned char) *(str+1) <= 0x39) + && ((unsigned char) *(str+2) >= 0x81 && (unsigned char) *(str+2) <= 0xFE) + && ((unsigned char) *(str+3) >= 0x30 && (unsigned char) *(str+3) <= 0x39)) { + return 1; + } + if (((unsigned char) *str >= 0x81 && (unsigned char) *str <= 0xFE ) + && (((unsigned char) *(str+1) >= 0x40 && (unsigned char) *(str+1) <= 0x7E) + || ((unsigned char) *(str+1) >= 0x80 && (unsigned char) *(str+1) <= 0xFE))) { + return 1; + } + } + return 0; +} + static char * convstr_id3v2 (int version, uint8_t encoding, const unsigned char* str, int sz) { char out[2048] = ""; @@ -682,8 +700,11 @@ convstr_id3v2 (int version, uint8_t encoding, const unsigned char* str, int sz) enc = UTF8_STR; } else if (encoding == 0) { + if (can_be_chinese (str)) { + // hack to add cp936 support + enc = "cp936"; + } else if (can_be_russian (str)) { // hack to add limited cp1251 recoding support - if (can_be_russian (str)) { enc = "cp1251"; } else { @@ -766,7 +787,11 @@ convstr_id3v1 (const char* str, int sz) { return str; } const char *enc = "iso8859-1"; - if (can_be_russian (str)) { + if (can_be_chinese (str)) { + // hack to add cp936 support + enc = "cp936"; + } else if (can_be_russian (str)) { + // hack to add limited cp1251 recoding support enc = "cp1251"; } @@ -3410,6 +3435,10 @@ junk_detect_charset (const char *s) { if (u8_valid (s, strlen (s), NULL)) { return NULL; // means no recoding required } + // hack to add cp936 support + if (can_be_chinese (s)) { + return "cp936"; + } // check if that could be non-latin1 (too many nonascii chars) if (can_be_russian (s)) { return "cp1251"; -- cgit v1.2.3