summaryrefslogtreecommitdiff
path: root/junklib.c
diff options
context:
space:
mode:
authorGravatar Joey Zheng <joey@jzheng.org>2012-08-31 15:30:15 +0800
committerGravatar waker <wakeroid@gmail.com>2012-09-03 18:14:00 +0200
commit188365cc3ae4cf5e3b0b2001a3558555727e50da (patch)
tree75693e1c7bdf07ab4985dc9298f1ac06424fe5a3 /junklib.c
parent9dd965ef0dc8a578ce8969cbe27f3399e0300dd4 (diff)
add Chinese Language support.
Diffstat (limited to 'junklib.c')
-rw-r--r--junklib.c33
1 files changed, 31 insertions, 2 deletions
diff --git a/junklib.c b/junklib.c
index d31a9435..0d142337 100644
--- a/junklib.c
+++ b/junklib.c
@@ -669,6 +669,24 @@ can_be_russian (const signed char *str) {
return 0;
}
+static int
+can_be_chinese (const signed char *str) {
+ for (; *str; str++) {
+ if (((unsigned char) *str >= 0x81 && (unsigned char) *str <= 0xFE )
+ && ((unsigned char) *(str+1) >= 0x30 && (unsigned char) *(str+1) <= 0x39)
+ && ((unsigned char) *(str+2) >= 0x81 && (unsigned char) *(str+2) <= 0xFE)
+ && ((unsigned char) *(str+3) >= 0x30 && (unsigned char) *(str+3) <= 0x39)) {
+ return 1;
+ }
+ if (((unsigned char) *str >= 0x81 && (unsigned char) *str <= 0xFE )
+ && (((unsigned char) *(str+1) >= 0x40 && (unsigned char) *(str+1) <= 0x7E)
+ || ((unsigned char) *(str+1) >= 0x80 && (unsigned char) *(str+1) <= 0xFE))) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
static char *
convstr_id3v2 (int version, uint8_t encoding, const unsigned char* str, int sz) {
char out[2048] = "";
@@ -682,8 +700,11 @@ convstr_id3v2 (int version, uint8_t encoding, const unsigned char* str, int sz)
enc = UTF8_STR;
}
else if (encoding == 0) {
+ if (can_be_chinese (str)) {
+ // hack to add cp936 support
+ enc = "cp936";
+ } else if (can_be_russian (str)) {
// hack to add limited cp1251 recoding support
- if (can_be_russian (str)) {
enc = "cp1251";
}
else {
@@ -766,7 +787,11 @@ convstr_id3v1 (const char* str, int sz) {
return str;
}
const char *enc = "iso8859-1";
- if (can_be_russian (str)) {
+ if (can_be_chinese (str)) {
+ // hack to add cp936 support
+ enc = "cp936";
+ } else if (can_be_russian (str)) {
+ // hack to add limited cp1251 recoding support
enc = "cp1251";
}
@@ -3410,6 +3435,10 @@ junk_detect_charset (const char *s) {
if (u8_valid (s, strlen (s), NULL)) {
return NULL; // means no recoding required
}
+ // hack to add cp936 support
+ if (can_be_chinese (s)) {
+ return "cp936";
+ }
// check if that could be non-latin1 (too many nonascii chars)
if (can_be_russian (s)) {
return "cp1251";