From 055ca014541453a3a2e07db6902b16f9406bf530 Mon Sep 17 00:00:00 2001 From: Alexey Yakovenko Date: Fri, 13 Jun 2014 19:11:37 +0200 Subject: improved cuesheet charset detection --- junklib.c | 12 ++++++++---- junklib.h | 3 +++ playlist.c | 27 ++++++++++++++++----------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/junklib.c b/junklib.c index 05e58fc9..37692c96 100644 --- a/junklib.c +++ b/junklib.c @@ -3639,15 +3639,13 @@ junk_id3v2_read (playItem_t *it, DB_FILE *fp) { } const char * -junk_detect_charset (const char *s) { - size_t len = strlen (s); - +junk_detect_charset_len (const char *s, int len) { // check if that's already utf8 if (u8_valid (s, len, NULL)) { return NULL; // means no recoding required } // try shift-jis - if (can_be_shift_jis (s, len)) { + if (len > 10 && can_be_shift_jis (s, len)) { return "shift-jis"; } // hack to add cp936 support @@ -3662,6 +3660,12 @@ junk_detect_charset (const char *s) { return "cp1252"; } +const char * +junk_detect_charset (const char *s) { + size_t len = strlen (s); + return junk_detect_charset_len (s, len); +} + int junk_recode (const char *in, int inlen, char *out, int outlen, const char *cs) { return junk_iconv (in, inlen, out, outlen, cs, UTF8_STR); diff --git a/junklib.h b/junklib.h index aaec1a56..6e68994f 100644 --- a/junklib.h +++ b/junklib.h @@ -125,4 +125,7 @@ junk_configchanged (void); void junk_enable_cp936_detection (int enable); +const char * +junk_detect_charset_len (const char *s, int len); + #endif // __JUNKLIB_H diff --git a/playlist.c b/playlist.c index 7a3bf5a0..5be2a779 100644 --- a/playlist.c +++ b/playlist.c @@ -834,7 +834,12 @@ pl_cue_skipspaces (const uint8_t *p) { } static void -pl_get_qvalue_from_cue (const uint8_t *p, int sz, char *out) { +pl_get_qvalue_from_cue (const uint8_t *p, int sz, char *out, const char *charset) { + if (!charset) { + strcpy (out, ""); + return; + } + char *str = out; if (*p == 0) { *out = 0; @@ -867,10 +872,7 @@ pl_get_qvalue_from_cue (const uint8_t *p, int sz, char *out) { out++; *out = 0; } - const char *charset = junk_detect_charset (str); - if (!charset) { - return; - } + // recode int l = strlen (str); char recbuf[l*10]; @@ -1033,11 +1035,14 @@ plt_process_cue_track (playlist_t *playlist, const char *fname, const int starts playItem_t * plt_insert_cue_from_buffer (playlist_t *playlist, playItem_t *after, playItem_t *origin, const uint8_t *buffer, int buffersize, int numsamples, int samplerate) { - LOCK; if (buffersize >= 3 && buffer[0] == 0xef && buffer[1] == 0xbb && buffer[2] == 0xbf) { buffer += 3; buffersize -= 3; } + + const char *charset = junk_detect_charset_len (buffer, buffersize); + + LOCK; playItem_t *ins = after; trace ("plt_insert_cue_from_buffer numsamples=%d, samplerate=%d\n", numsamples, samplerate); char albumperformer[256] = ""; @@ -1086,25 +1091,25 @@ plt_insert_cue_from_buffer (playlist_t *playlist, playItem_t *after, playItem_t // trace ("cue line: %s\n", p); if (!strncmp (p, "PERFORMER ", 10)) { if (!track[0]) { - pl_get_qvalue_from_cue (p + 10, sizeof (albumperformer), albumperformer); + pl_get_qvalue_from_cue (p + 10, sizeof (albumperformer), albumperformer, charset); } else { - pl_get_qvalue_from_cue (p + 10, sizeof (performer), performer); + pl_get_qvalue_from_cue (p + 10, sizeof (performer), performer, charset); } trace ("cue: got performer: %s\n", performer); } else if (!strncmp (p, "TITLE ", 6)) { if (str[0] > ' ' && !albumtitle[0]) { - pl_get_qvalue_from_cue (p + 6, sizeof (albumtitle), albumtitle); + pl_get_qvalue_from_cue (p + 6, sizeof (albumtitle), albumtitle, charset); trace ("cue: got albumtitle: %s\n", albumtitle); } else { - pl_get_qvalue_from_cue (p + 6, sizeof (title), title); + pl_get_qvalue_from_cue (p + 6, sizeof (title), title, charset); trace ("cue: got title: %s\n", title); } } else if (!strncmp (p, "REM GENRE ", 10)) { - pl_get_qvalue_from_cue (p + 10, sizeof (genre), genre); + pl_get_qvalue_from_cue (p + 10, sizeof (genre), genre, charset); } else if (!strncmp (p, "REM DATE ", 9)) { pl_get_value_from_cue (p + 9, sizeof (date), date); -- cgit v1.2.3