From ba15707b292d827bdce732e7713b26fae3f75c74 Mon Sep 17 00:00:00 2001
From: Alex Bennee <alex@bennee.com>
Date: Wed, 14 Jul 2010 15:57:06 +0100
Subject: EasyTag 2.1.1

---
 src/charset.c | 824 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 824 insertions(+)
 create mode 100755 src/charset.c

(limited to 'src/charset.c')

diff --git a/src/charset.c b/src/charset.c
new file mode 100755
index 0000000..74ecd9d
--- /dev/null
+++ b/src/charset.c
@@ -0,0 +1,824 @@
+/*
+ * Main part of code, written by:
+ *
+ * Copyright (C) 1999-2001  Håvard Kvålen <havardk@xmms.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <glib.h>
+#include <string.h>
+#include <errno.h>
+#include <glib/gi18n-lib.h>
+
+#ifdef HAVE_LANGINFO_CODESET
+#include <langinfo.h>
+#endif
+
+#include "charset.h"
+#include "setting.h"
+#include "log.h"
+
+#ifdef WIN32
+    #include "win32/win32dep.h"
+#endif
+
+
+/****************
+ * Declarations *
+ ****************/
+
+#define CHARSET_TRANS_ARRAY_LEN ( sizeof(charset_trans_array) / sizeof((charset_trans_array)[0]) )
+const CharsetInfo charset_trans_array[] = {
+    {N_("Arabic (IBM-864)"),                  "IBM864"        },
+    {N_("Arabic (ISO-8859-6)"),               "ISO-8859-6"    },
+    {N_("Arabic (Windows-1256)"),             "windows-1256"  },
+    {N_("Baltic (ISO-8859-13)"),              "ISO-8859-13"   },
+    {N_("Baltic (ISO-8859-4)"),               "ISO-8859-4"    },
+    {N_("Baltic (Windows-1257)"),             "windows-1257"  },
+    {N_("Celtic (ISO-8859-14)"),              "ISO-8859-14"   },
+    {N_("Central European (IBM-852)"),        "IBM852"        },
+    {N_("Central European (ISO-8859-2)"),     "ISO-8859-2"    },
+    {N_("Central European (Windows-1250)"),   "windows-1250"  },
+    {N_("Chinese Simplified (GB18030)"),      "gb18030"       },
+    {N_("Chinese Simplified (GB2312)"),       "GB2312"        },
+    {N_("Chinese Traditional (Big5)"),        "Big5"          },
+    {N_("Chinese Traditional (Big5-HKSCS)"),  "Big5-HKSCS"    },
+    {N_("Cyrillic (IBM-855)"),                "IBM855"        },
+    {N_("Cyrillic (ISO-8859-5)"),             "ISO-8859-5"    },
+    {N_("Cyrillic (ISO-IR-111)"),             "ISO-IR-111"    },
+    {N_("Cyrillic (KOI8-R)"),                 "KOI8-R"        },
+    {N_("Cyrillic (Windows-1251)"),           "windows-1251"  },
+    {N_("Cyrillic/Russian (CP-866)"),         "IBM866"        },
+    {N_("Cyrillic/Ukrainian (KOI8-U)"),       "KOI8-U"        },
+    {N_("English (US-ASCII)"),                "us-ascii"      },
+    {N_("Greek (ISO-8859-7)"),                "ISO-8859-7"    },
+    {N_("Greek (Windows-1253)"),              "windows-1253"  },
+    {N_("Hebrew (IBM-862)"),                  "IBM862"        },
+    {N_("Hebrew (Windows-1255)"),             "windows-1255"  },
+    {N_("Japanese (EUC-JP)"),                 "EUC-JP"        },
+    {N_("Japanese (ISO-2022-JP)"),            "ISO-2022-JP"   },
+    {N_("Japanese (Shift_JIS)"),              "Shift_JIS"     },
+    {N_("Korean (EUC-KR)"),                   "EUC-KR"        },
+    {N_("Nordic (ISO-8859-10)"),              "ISO-8859-10"   },
+    {N_("South European (ISO-8859-3)"),       "ISO-8859-3"    },
+    {N_("Thai (TIS-620)"),                    "TIS-620"       },
+    {N_("Turkish (IBM-857)"),                 "IBM857"        },
+    {N_("Turkish (ISO-8859-9)"),              "ISO-8859-9"    },
+    {N_("Turkish (Windows-1254)"),            "windows-1254"  },
+    //{N_("Unicode (UTF-7)"),                   "UTF-7"         },
+    {N_("Unicode (UTF-8)"),                   "UTF-8"         },
+
+    //{N_("Unicode (UTF-16BE)"),                "UTF-16BE"      },
+    //{N_("Unicode (UTF-16LE)"),                "UTF-16LE"      },
+    //{N_("Unicode (UTF-32BE)"),                "UTF-32BE"      },
+    //{N_("Unicode (UTF-32LE)"),                "UTF-32LE"      },
+
+    {N_("Vietnamese (VISCII)"),               "VISCII"        },
+    {N_("Vietnamese (Windows-1258)"),         "windows-1258"  },
+    {N_("Visual Hebrew (ISO-8859-8)"),        "ISO-8859-8"    },
+    {N_("Western (IBM-850)"),                 "IBM850"        },
+    {N_("Western (ISO-8859-1)"),              "ISO-8859-1"    },
+    {N_("Western (ISO-8859-15)"),             "ISO-8859-15"   },
+    {N_("Western (Windows-1252)"),            "windows-1252"  }
+
+    /*
+     * From this point, character sets aren't supported by iconv
+     */
+/*    {N_("Arabic (IBM-864-I)"),                "IBM864i"              },
+    {N_("Arabic (ISO-8859-6-E)"),             "ISO-8859-6-E"         },
+    {N_("Arabic (ISO-8859-6-I)"),             "ISO-8859-6-I"         },
+    {N_("Arabic (MacArabic)"),                "x-mac-arabic"         },
+    {N_("Armenian (ARMSCII-8)"),              "armscii-8"            },
+    {N_("Central European (MacCE)"),          "x-mac-ce"             },
+    {N_("Chinese Simplified (GBK)"),          "x-gbk"                },
+    {N_("Chinese Simplified (HZ)"),           "HZ-GB-2312"           },
+    {N_("Chinese Traditional (EUC-TW)"),      "x-euc-tw"             },
+    {N_("Croatian (MacCroatian)"),            "x-mac-croatian"       },
+    {N_("Cyrillic (MacCyrillic)"),            "x-mac-cyrillic"       },
+    {N_("Cyrillic/Ukrainian (MacUkrainian)"), "x-mac-ukrainian"      },
+    {N_("Farsi (MacFarsi)"),                  "x-mac-farsi"},
+    {N_("Greek (MacGreek)"),                  "x-mac-greek"          },
+    {N_("Gujarati (MacGujarati)"),            "x-mac-gujarati"       },
+    {N_("Gurmukhi (MacGurmukhi)"),            "x-mac-gurmukhi"       },
+    {N_("Hebrew (ISO-8859-8-E)"),             "ISO-8859-8-E"         },
+    {N_("Hebrew (ISO-8859-8-I)"),             "ISO-8859-8-I"         },
+    {N_("Hebrew (MacHebrew)"),                "x-mac-hebrew"         },
+    {N_("Hindi (MacDevanagari)"),             "x-mac-devanagari"     },
+    {N_("Icelandic (MacIcelandic)"),          "x-mac-icelandic"      },
+    {N_("Korean (JOHAB)"),                    "x-johab"              },
+    {N_("Korean (UHC)"),                      "x-windows-949"        },
+    {N_("Romanian (MacRomanian)"),            "x-mac-romanian"       },
+    {N_("Turkish (MacTurkish)"),              "x-mac-turkish"        },
+    {N_("User Defined"),                      "x-user-defined"       },
+    {N_("Vietnamese (TCVN)"),                 "x-viet-tcvn5712"      },
+    {N_("Vietnamese (VPS)"),                  "x-viet-vps"           },
+    {N_("Western (MacRoman)"),                "x-mac-roman"          },
+    // charsets whithout possibly translatable names
+    {"T61.8bit",                              "T61.8bit"             },
+    {"x-imap4-modified-utf7",                 "x-imap4-modified-utf7"},
+    {"x-u-escaped",                           "x-u-escaped"          },
+    {"windows-936",                           "windows-936"          }
+*/
+};
+
+static GHashTable *encodings;
+
+
+
+/*************
+ * Functions *
+ *************/
+
+
+/* stolen from gnome-desktop-item.c */
+static gboolean
+check_locale (const char *locale)
+{
+    GIConv cd = g_iconv_open ("UTF-8", locale);
+    if ((GIConv)-1 == cd)
+        return FALSE;
+    g_iconv_close (cd);
+    return TRUE;
+}
+
+/* stolen from gnome-desktop-item.c */
+static void
+insert_locales (GHashTable *encodings, char *enc, ...)
+{
+    va_list args;
+    char *s;
+
+    va_start (args, enc);
+    for (;;) {
+        s = va_arg (args, char *);
+        if (s == NULL)
+            break;
+        g_hash_table_insert (encodings, s, enc);
+    }
+    va_end (args);
+}
+
+/* stolen from gnome-desktop-item.c */
+/* make a standard conversion table from the desktop standard spec */
+void
+Charset_Insert_Locales_Init (void)
+{
+    encodings = g_hash_table_new (g_str_hash, g_str_equal);
+
+    /* "C" is plain ascii */
+    insert_locales (encodings, "ASCII", "C", NULL);
+#if WIN32
+    insert_locales (encodings, "windows-1256", "ar", NULL); // 2006.12.31 - For testing with Arabic
+#else
+    insert_locales (encodings, "ISO-8859-6", "ar", NULL);
+#endif
+    insert_locales (encodings, "ARMSCII-8", "by", NULL);
+    insert_locales (encodings, "BIG5", "zh_TW", NULL);
+    insert_locales (encodings, "CP1251", "be", "bg", NULL);
+    if (check_locale ("EUC-CN")) {
+        insert_locales (encodings, "EUC-CN", "zh_CN", NULL);
+    } else {
+        insert_locales (encodings, "GB2312", "zh_CN", NULL);
+    }
+    insert_locales (encodings, "EUC-JP", "ja", NULL);
+    insert_locales (encodings, "EUC-KR", "ko", NULL);
+    /*insert_locales (encodings, "GEORGIAN-ACADEMY", NULL);*/
+    insert_locales (encodings, "GEORGIAN-PS", "ka", NULL);
+    insert_locales (encodings, "ISO-8859-1", "br", "ca", "da", "de", "en", "es", "eu", "fi", "fr", "gl", "it", "nl", "wa", "no", "pt", "pt", "sv", NULL);
+#if WIN32
+    insert_locales (encodings, "windows-1250", "cs", "hr", "hu", "pl", "ro", "sk", "sl", "sq", "sr", NULL);
+#else
+    insert_locales (encodings, "ISO-8859-2", "cs", "hr", "hu", "pl", "ro", "sk", "sl", "sq", "sr", NULL);
+#endif
+    insert_locales (encodings, "ISO-8859-3", "eo", NULL);
+    insert_locales (encodings, "ISO-8859-5", "mk", "sp", NULL);
+#if WIN32
+    insert_locales (encodings, "windows-1253", "el", NULL);
+#else
+    insert_locales (encodings, "ISO-8859-7", "el", NULL);
+#endif
+#if WIN32
+    insert_locales (encodings, "windows-1254", "tr", NULL);
+#else
+    insert_locales (encodings, "ISO-8859-9", "tr", NULL);
+#endif
+    insert_locales (encodings, "ISO-8859-13", "lt", "lv", "mi", NULL);
+    insert_locales (encodings, "ISO-8859-14", "ga", "cy", NULL);
+    insert_locales (encodings, "ISO-8859-15", "et", NULL);
+#if WIN32
+    insert_locales (encodings, "windows-1251", "ru", NULL);
+#else
+    insert_locales (encodings, "KOI8-R", "ru", NULL);
+#endif
+    insert_locales (encodings, "KOI8-U", "uk", NULL);
+    if (check_locale ("TCVN-5712")) {
+        insert_locales (encodings, "TCVN-5712", "vi", NULL);
+    } else {
+        insert_locales (encodings, "TCVN", "vi", NULL);
+    }
+    insert_locales (encodings, "TIS-620", "th", NULL);
+#if WIN32
+    insert_locales (encodings, "windows-1255", "he", NULL);
+#endif
+    /*insert_locales (encodings, "VISCII", NULL);*/
+}
+
+void
+Charset_Insert_Locales_Destroy (void)
+{
+    g_hash_table_destroy (encodings);
+}
+
+/* stolen from gnome-desktop-item.c */
+const char *
+get_encoding_from_locale (const char *locale)
+{
+    char lang[3];
+    const char *encoding;
+
+    if (locale == NULL)
+        return NULL;
+
+    /* if locale includes encoding, use it *//*
+    encoding = strchr (locale, '.');
+    if (encoding != NULL) {
+        return encoding+1;
+    }*/
+    /* if locale includes encoding (that isn't UTF-8), use it */
+    encoding = strchr (locale, '.');
+    if (encoding != NULL && strncmp (encoding, ".UTF-8", 6)) {
+        return encoding+1;
+    }
+
+    /* first try the entire locale (at this point ll_CC) */
+    encoding = g_hash_table_lookup (encodings, locale);
+    if (encoding != NULL)
+        return encoding;
+
+    /* Try just the language */
+    strncpy (lang, locale, 2);
+    lang[2] = '\0';
+    return g_hash_table_lookup (encodings, lang);
+}
+
+
+/*
+ * Return the locale from LANG if exists, else from LC_ALL
+ *
+ * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap08.html#tag_08_02
+ *
+ * LANG
+ *     This variable shall determine the locale category for native language,
+ *     local customs, and coded character set in the absence of the LC_ALL and
+ *     other LC_* ( LC_COLLATE , LC_CTYPE , LC_MESSAGES , LC_MONETARY , LC_NUMERIC ,
+ *     LC_TIME ) environment variables. This can be used by applications to
+ *     determine the language to use for error messages and instructions, collating
+ *     sequences, date formats, and so on.
+ * LC_ALL
+ *     This variable shall determine the values for all locale categories. The
+ *     value of the LC_ALL environment variable has precedence over any of the
+ *     other environment variables starting with LC_ ( LC_COLLATE , LC_CTYPE ,
+ *     LC_MESSAGES , LC_MONETARY , LC_NUMERIC , LC_TIME ) and the LANG environment
+ *     variable.
+ * LC_COLLATE
+ *     This variable shall determine the locale category for character collation.
+ *     It determines collation information for regular expressions and sorting,
+ *     including equivalence classes and multi-character collating elements, in
+ *     various utilities and the strcoll() and strxfrm() functions. Additional
+ *     semantics of this variable, if any, are implementation-defined.
+ * LC_CTYPE
+ *     This variable shall determine the locale category for character handling
+ *     functions, such as tolower(), toupper(), and isalpha(). This environment
+ *     variable determines the interpretation of sequences of bytes of text data
+ *     as characters (for example, single as opposed to multi-byte characters),
+ *     the classification of characters (for example, alpha, digit, graph), and
+ *     the behavior of character classes. Additional semantics of this variable,
+ *    if any, are implementation-defined.
+ * LC_MESSAGES
+ *     This variable shall determine the locale category for processing affirmative
+ *     and negative responses and the language and cultural conventions in which
+ *     messages should be written. [XSI] [Option Start]  It also affects the behavior
+ *     of the catopen() function in determining the message catalog. [Option End]
+ *     Additional semantics of this variable, if any, are implementation-defined.
+ *     The language and cultural conventions of diagnostic and informative messages
+ *     whose format is unspecified by IEEE Std 1003.1-2001 should be affected by
+ *     the setting of LC_MESSAGES .
+ * LC_MONETARY
+ *     This variable shall determine the locale category for monetary-related
+ *     numeric formatting information. Additional semantics of this variable, if
+ *     any, are implementation-defined.
+ * LC_NUMERIC
+ *     This variable shall determine the locale category for numeric formatting
+ *     (for example, thousands separator and radix character) information in
+ *     various utilities as well as the formatted I/O operations in printf() and
+ *     scanf() and the string conversion functions in strtod(). Additional semantics
+ *     of this variable, if any, are implementation-defined.
+ * LC_TIME
+ *     This variable shall determine the locale category for date and time formatting
+ *     information. It affects the behavior of the time functions in strftime().
+ *     Additional semantics of this variable, if any, are implementation-defined.
+ *
+ *
+ * The values of locale categories shall be determined by a precedence order; the
+ * first condition met below determines the value:
+ *
+ *    1. If the LC_ALL environment variable is defined and is not null, the value
+ *       of LC_ALL shall be used.
+ *    2. If the LC_* environment variable ( LC_COLLATE , LC_CTYPE , LC_MESSAGES ,
+ *       LC_MONETARY , LC_NUMERIC , LC_TIME ) is defined and is not null, the value
+ *       of the environment variable shall be used to initialize the category that
+ *       corresponds to the environment variable.
+ *    3. If the LANG environment variable is defined and is not null, the value of
+ *       the LANG environment variable shall be used.
+ *    4. If the LANG environment variable is not set or is set to the empty string,
+ *       the implementation-defined default locale shall be used.
+ *
+ */
+const gchar *get_locale (void)
+{
+    if (g_getenv("LC_ALL"))
+        return g_getenv("LC_ALL");
+
+    else if (g_getenv("LC_CTYPE"))
+        return g_getenv("LC_CTYPE");
+
+    else if (g_getenv("LANG"))
+        return g_getenv("LANG");
+
+    else
+        return NULL;
+}
+
+
+
+
+/*
+ * convert_string : (don't use with UTF-16 strings)
+ *  - display_error : if TRUE, may return an escaped string and display an error
+ *                    message (if conversion fails).
+ */
+gchar *convert_string (const gchar *string, const gchar *from_codeset,
+                       const gchar *to_codeset, const gboolean display_error)
+{
+    return convert_string_1(string, -1, from_codeset, to_codeset, display_error);
+}
+/* Length must be passed, as the string might be Unicode, in which case we can't
+ * count zeroes (see strlen call below). */
+gchar *convert_string_1 (const gchar *string, gssize length, const gchar *from_codeset,
+                         const gchar *to_codeset, const gboolean display_error)
+{
+    gchar *output;
+    GError *error = NULL;
+    gsize bytes_written;
+
+    if (!string)
+        return NULL;
+
+    output = g_convert(string, length, to_codeset, from_codeset, NULL, &bytes_written, &error);
+    //output = g_convert_with_fallback(string, length, to_codeset, from_codeset, "?", NULL, &bytes_written, &error);
+
+    if (output == NULL)
+    {
+        gchar *escaped_str = g_strescape(string, NULL);
+        if (display_error)
+        {
+            Log_Print("convert_string(): Failed conversion from charset '%s' to '%s'. "
+                      "String '%s'. Errcode %d (%s).\n",
+                      from_codeset, to_codeset, escaped_str, error->code, error->message);
+        }
+        g_free(escaped_str);
+        g_error_free(error);
+        // Return the input string without converting it. If the string is
+        // displayed in the UI, it must be in UTF-8!
+        if ( (g_ascii_strcasecmp(to_codeset, "UTF-8"))
+        ||   (g_utf8_validate(string, -1, NULL)) )
+        {
+            return g_strdup(string);
+        }
+    }else
+    {
+        // Patch from Alexey Illarionov:
+        //    g_convert returns null-terminated string only with one \0 at the
+        // end. It can cause some garbage at the end of a string for UTF-16.
+        // The second \0 should be set manually.
+        output = g_realloc(output, bytes_written + 2);
+        if (output != NULL)
+            output[bytes_written] = output[bytes_written + 1] = 0;
+    }
+
+    //g_print("from %s => len: %d, string: '%s'\n     (%x %x %x %x %x %x %x %x)\n",from_codeset,length,string,string[0],string[1],string[2],string[3],string[4],string[5],string[6],string[7]);
+    //g_print("to   %s => len: %d, output: '%s'\n     (%x %x %x %x %x %x %x %x)\n\n",to_codeset,bytes_written+2,output,output[0],output[1],output[2],output[3],output[4],output[5],output[6],output[7]);
+
+    return output;
+}
+
+
+/*
+ * Conversion with UTF-8 for Ogg Vorbis and FLAC tags (current_charset <===> UTF-8)
+ */
+gchar *convert_to_utf8 (const gchar *string)
+{
+    gchar *output;
+    GError *error = NULL;
+
+    if (!string)
+        return NULL;
+
+    output = g_locale_to_utf8(string, -1, NULL, NULL, &error);
+
+    if (output == NULL)
+    {
+        const gchar *usercharset;
+        gchar *escaped_str = g_strescape(string, NULL);
+        g_get_charset(&usercharset);
+        Log_Print("convert_to_utf8(): Failed conversion from charset '%s'. "
+                  "String '%s'. Errcode %d (%s).\n",
+                  usercharset, escaped_str, error->code, error->message);
+        g_free(escaped_str);
+
+        if (g_utf8_validate(string, -1, NULL))
+            Log_Print("convert_to_utf8(): String was valid UTF-8.\n");
+        else
+            Log_Print("convert_to_utf8(): String was INVALID UTF-8.\n");
+
+        g_error_free(error);
+        return g_strdup(string);
+    }
+
+    return output;
+}
+
+gchar *convert_from_utf8 (const char *string)
+{
+    gchar *output;
+    GError *error = NULL;
+
+    if (!string)
+        return NULL;
+
+    output = g_locale_from_utf8(string, -1, NULL, NULL, &error);
+
+    if (output == NULL)
+    {
+        const gchar *usercharset;
+        gchar *escaped_str = g_strescape(string, NULL);
+        g_get_charset(&usercharset);
+        Log_Print("convert_from_utf8(): Failed conversion to charset '%s'. "
+                  "String '%s'. Errcode %d (%s).\n",
+                  usercharset, escaped_str, error->code, error->message);
+        g_free(escaped_str);
+
+        if (g_utf8_validate(string, -1, NULL))
+            Log_Print("convert_from_utf8(): String was valid UTF-8.\n");
+        else
+            Log_Print("convert_from_utf8(): String was INVALID UTF-8.\n");
+
+        g_error_free(error);
+        return g_strdup(string);
+    }
+
+    return output;
+}
+
+
+
+/*
+ * Convert a string from the filename system encoding to UTF-8.
+ *  - conversion OK : returns the UTF-8 string (new allocated)
+ *  - conversion KO : tries others encodings else returns an 'escaped' string
+ */
+gchar *filename_to_display (const gchar *string)
+{
+    gchar *ret = NULL;
+    GError *error = NULL;
+
+    if (!string)
+        return NULL;
+
+    if (g_utf8_validate(string, -1, NULL))
+    {
+        // String already in UTF-8
+        ret = g_strdup(string);
+    }else
+    {
+        const gchar *char_encoding;
+
+        // Get encoding associated to the locale without using UTF-8 (ex , if LANG=fr_FR.UTF-8 it will return ISO-8859-1)
+        char_encoding = get_encoding_from_locale(get_locale());
+        if (char_encoding)
+        {
+            //g_print("> char_encoding: %s\n",char_encoding);
+            error = NULL;
+            ret = g_convert(string, -1, "UTF-8", char_encoding, NULL, NULL, &error);
+        }
+
+        if (!ret)
+        {
+            // Failing that, try ISO-8859-1
+            error = NULL;
+            ret = g_convert(string, -1, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
+        }
+
+        if (!ret)
+        {
+            gchar *escaped_str = g_strescape(string, NULL);
+            Log_Print(_("The filename '%s' couldn't be converted into UTF-8 (%s).\n"),
+                        escaped_str, error && error->message ? error->message : _("Invalid UTF-8"));
+            g_clear_error(&error);
+
+            ret = escaped_str;
+        }
+    }
+
+#ifdef WIN32
+    ET_Win32_Path_Remove_Trailing_Slash(ret);
+    ET_Win32_Path_Replace_Slashes(ret);
+#endif
+
+    return ret;
+}
+
+/*
+ * Convert a string from UTF-8 to the filename system encoding.
+ *  - conversion OK : returns the string in filename system encoding (new allocated)
+ *  - conversion KO : display error message + returns nothing!
+ */
+gchar *filename_from_display (const gchar *string)
+{
+    GError *error = NULL;
+    gchar *ret = NULL;
+    const gchar *char_encoding = NULL;
+    //const gchar *filename_encoding = NULL;
+
+    if (!string) return NULL;
+
+    // Get system encoding from LANG if found (ex : fr_FR.UTF-8 => UTF-8)
+    if (get_locale())
+        char_encoding = strchr(get_locale(), '.');
+
+    if (char_encoding)
+        char_encoding = char_encoding+1; // Skip the '.'
+    if (char_encoding)
+    {
+        error = NULL;
+
+        if (FILENAME_CHARACTER_SET_OTHER)
+        {
+            ret = g_convert(string, -1, char_encoding, "UTF-8", NULL, NULL, &error);
+
+        }else if (FILENAME_CHARACTER_SET_APPROXIMATE)
+        {
+            // iconv_open (3):
+            // When the string "//TRANSLIT" is appended to tocode, transliteration
+            // is activated. This means that when a character cannot be represented
+            // in the target character set, it can be approximated through one or
+            // several similarly looking characters.
+            gchar *enc = g_strconcat(char_encoding, "//TRANSLIT", NULL);
+            ret = g_convert(string, -1, enc, "UTF-8", NULL, NULL, &error);
+            g_free(enc);
+
+        }else if (FILENAME_CHARACTER_SET_DISCARD)
+        {
+            // iconv_open (3):
+            // When the string "//IGNORE" is appended to tocode, characters that
+            // cannot be represented in the target character set will be silently
+            // discarded.
+            gchar *enc = g_strconcat(char_encoding, "//IGNORE", NULL);
+            ret = g_convert(string, -1, enc, "UTF-8", NULL, NULL, &error);
+            g_free(enc);
+        }
+    }
+
+    if (!ret)
+    {
+        // Get system encoding from locale in LANG if found (ex : fr_FR.UTF-8 => fr_FR => ISO-8859-1)
+        char_encoding = get_encoding_from_locale(get_locale());
+        if (char_encoding)
+        {
+            //g_print("> char_encoding: %s\n",char_encoding);
+            error = NULL;
+            ret = g_convert(string, -1, char_encoding, "UTF-8", NULL, NULL, &error);
+        }
+    }
+
+    if (!ret)
+    {
+        // Failing that, try ISO-8859-1
+        error = NULL;
+        ret = g_convert(string, -1, "ISO-8859-1", "UTF-8", NULL, NULL, &error);
+    }
+
+    if (!ret)
+    {
+        if (g_utf8_validate(string, -1, NULL))
+        {
+            // String already in UTF-8
+            ret = g_strdup(string);
+        }
+    }
+
+    if (!ret)
+    {
+        // Conversion KO!
+        gchar *escaped_str = g_strescape(string, NULL);
+        Log_Print(_("The UTF-8 string '%s' couldn't be converted into filename encoding (%s)\n"),
+                    escaped_str, error && error->message ? error->message : _("Invalid UTF-8"));
+        g_clear_error(&error);
+
+        ret = escaped_str;
+    }
+
+#ifdef WIN32
+    //ET_Win32_Path_Replace_Backslashes(ret);
+#endif
+
+    return ret; // We need to catch errors (e.g. temp=NULL) in the real code
+}
+
+
+
+/*
+ * Function used when reading tags : we check if the string is valid UTF-8 (else
+ *   it may cause problem in EasyTAG)
+ *
+ * Examples :
+ *   - some Ogg Vorbis tags contain ISO-8859-1 characters instead of UTF-8).
+ *   - some Flac tags may be probably encoded to ISO-8859-15 (by using for example
+ *     "metaflac --no-utf8-convert ...") so we convert it from ISO-8859-1 to UTF-8.
+ *
+ * If not valid UTF-8, we try some conversion to try to get the correct string
+ *  - conversion OK : returns the UTF-8 string (new allocated)
+ *  - conversion KO : tries others encodings else returns an 'escaped' string
+ */
+gchar *Try_To_Validate_Utf8_String (const gchar *string)
+{
+    gchar *ret = NULL;
+    GError *error = NULL;
+
+    if (!string)
+        return NULL;
+
+    if (g_utf8_validate(string, -1, NULL))
+    {
+        // String already in UTF-8
+        ret = g_strdup(string);
+    }else
+    {
+        const gchar *char_encoding;
+
+        // Get encoding associated to the locale without using UTF-8 (ex , if LANG=fr_FR.UTF-8 it will return ISO-8859-1)
+        char_encoding = get_encoding_from_locale(get_locale());
+        if (char_encoding)
+        {
+            //g_print("> char_encoding: %s\n",char_encoding);
+            error = NULL;
+            ret = g_convert(string, -1, "UTF-8", char_encoding, NULL, NULL, &error);
+        }
+
+        if (!ret)
+        {
+            // Failing that, try ISO-8859-1
+            error = NULL;
+            ret = g_convert(string, -1, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
+        }
+
+        if (!ret)
+        {
+            gchar *escaped_str = g_strescape(string, NULL);
+            Log_Print(_("The string '%s' couldn't be converted into UTF-8 (%s).\n"),
+                        escaped_str, error && error->message ? error->message : _("Invalid UTF-8"));
+            g_clear_error(&error);
+
+            ret = escaped_str;
+        }
+    }
+
+    return ret;
+}
+
+
+
+void Charset_Populate_Combobox (GtkComboBox *combo, gchar *select_charset)
+{
+    guint i;
+
+    for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
+    {
+        gtk_combo_box_append_text(combo, _(charset_trans_array[i].charset_title));
+
+        if (select_charset && strcmp(charset_trans_array[i].charset_name, select_charset) == 0)
+            gtk_combo_box_set_active(combo, i);
+    }
+}
+
+
+/*
+ * Return charset_name from charset_title
+ */
+gchar *Charset_Get_Name_From_Title (const gchar *charset_title)
+{
+    guint i;
+
+    if (charset_title)
+        for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
+            if ( strcasecmp(_(charset_title),_(charset_trans_array[i].charset_title)) == 0 )
+                return charset_trans_array[i].charset_name;
+    return "";
+}
+
+
+/*
+ * Return charset_title from charset_name
+ */
+gchar *Charset_Get_Title_From_Name (gchar *charset_name)
+{
+    guint i;
+
+    if (charset_name)
+        for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
+            if ( strcasecmp(charset_name,charset_trans_array[i].charset_name) == 0 )
+                return _(charset_trans_array[i].charset_title);
+    return "";
+}
+
+
+
+/*
+ * Test if the conversion is supported between two character sets ('from' and 'to)
+ * (function called in the preferences window).
+ * Note : for UTF-16 (2 byte for each character) we make a special test...
+ */
+gboolean test_conversion_charset (const gchar *from, const gchar *to)
+{
+    gchar *temp;
+
+    if (!from || !to)
+        return FALSE;
+
+    // Do a quick test conversion and examine error output
+    if ( strcmp(from,"UTF-16BE") == 0 )
+    {
+        temp = convert_string_1("F\0O\0O\0\0\0", 6, from, to, FALSE);
+    }else if ( strcmp(from,"UTF-16LE") == 0 )
+    {
+        temp = convert_string_1("\0F\0O\0O\0\0", 6, from, to, FALSE);
+    }else
+    {
+        temp = convert_string("FOO", from, to, FALSE);
+    }
+
+    if (!temp)
+    {
+        /*// Error in conversion
+        if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
+        {
+            Log_Print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_NO_CONVERSION)",from,to);
+        } else if (error && error->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE)
+        {
+            Log_Print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_ILLEGAL_SEQUENCE)",from,to);
+        } else if (error && error->code == G_CONVERT_ERROR_FAILED)
+        {
+            Log_Print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_FAILED)",from,to);
+        } else if (error && error->code == G_CONVERT_ERROR_PARTIAL_INPUT)
+        {
+            Log_Print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_PARTIAL_INPUT)",from,to);
+        } else if (error && error->code == G_CONVERT_ERROR_BAD_URI)
+        {
+            Log_Print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_BAD_URI)",from,to);
+        } else if (error && error->code == G_CONVERT_ERROR_NOT_ABSOLUTE_PATH)
+        {
+            Log_Print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_NOT_ABSOLUTE_PATH)",from,to);
+        } else
+        {
+            Log_Print("Conversion error from '%s' to '%s' (unknown : %d)",from,to,error->code);
+        }
+
+        if (error)
+            g_error_free(error);*/
+        return FALSE;
+    } else
+    {
+        /*// No error
+        if (error)
+            g_error_free(error);*/
+        g_free(temp);
+        return TRUE;
+    }
+}
-- 
cgit v1.2.3