summaryrefslogtreecommitdiff
path: root/junklib.c
diff options
context:
space:
mode:
authorGravatar Alexey Yakovenko <waker@users.sourceforge.net>2014-04-30 14:11:34 +0200
committerGravatar Alexey Yakovenko <waker@users.sourceforge.net>2014-04-30 15:16:23 +0200
commitfb78930d5ccf439719bc42e178aa7dc9a806d969 (patch)
tree39b02b0003e8a2df7cb2c45f5b90b4170f664882 /junklib.c
parent03c9118bc14cd4ec10420216d763d07286059b60 (diff)
junklib: added shift-jis detection
Diffstat (limited to 'junklib.c')
-rw-r--r--junklib.c35
1 files changed, 35 insertions, 0 deletions
diff --git a/junklib.c b/junklib.c
index 1c57ba6e..e7f8a910 100644
--- a/junklib.c
+++ b/junklib.c
@@ -710,6 +710,37 @@ can_be_chinese (const uint8_t *str, int sz) {
return 0;
}
+static int
+can_be_shift_jis (const unsigned char *str, int size) {
+ unsigned char out[size*4];
+
+ if (size < 2) {
+ return 0;
+ }
+
+ const unsigned char *p = str;
+ int s = size;
+ while (s >= 2) {
+ if ((((p[0] >= 0x81 && p[0] <= 0x84) || (p[0] >= 0x87 && p[0] <= 0x9f))
+ && ((p[1] >= 0x40 && p[1] <= 0x9e) || (p[1] >= 0x9f && p[1] <= 0xfc)))
+ || ((p[0] >= 0xe0 && p[0] <= 0xef)
+ && ((p[1] >= 0x40 && p[1] <= 0x9e) || (p[1] >= 0x9f && p[1] <= 0xfc)))) {
+ break;
+ }
+ s--;
+ p++;
+ }
+
+ if (s >= 2) {
+ if (junk_iconv (str, size, out, sizeof (out), "shift-jis", UTF8_STR) >= 0) {
+ return 1;
+ }
+ }
+ return 0;
+
+}
+
+
static char *
convstr_id3v2 (int version, uint8_t encoding, const unsigned char* str, int sz) {
char out[2048] = "";
@@ -3528,6 +3559,10 @@ junk_detect_charset (const char *s) {
if (u8_valid (s, len, NULL)) {
return NULL; // means no recoding required
}
+ // try shift-jis
+ if (can_be_shift_jis (s, len)) {
+ return "shift-jis";
+ }
// hack to add cp936 support
if (can_be_chinese (s, len)) {
return "cp936";