From e1756130511cafa5127dc18a8d3844ddd63961a0 Mon Sep 17 00:00:00 2001 From: Alexey Yakovenko Date: Sun, 13 Sep 2009 17:06:02 +0200 Subject: added case insensitive search for several languages --- utf8.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'utf8.c') diff --git a/utf8.c b/utf8.c index 06a9e3bb..6c9ada11 100644 --- a/utf8.c +++ b/utf8.c @@ -18,6 +18,7 @@ #include #include #include +#include "ctype.h" #include "utf8.h" static const uint32_t offsetsFromUTF8[6] = { @@ -592,3 +593,74 @@ int u8_valid (const char *str, return 1; } +static const char lowerchars[] = "záéíñóúüäöåæøàçèéêабвгдеёжзийклмнорпстуфхцчшщъыьэюя"; +static const char upperchars[] = "ZÁÉÍÑÓÚÜÄÖÅÆØÀÇÈÉÊАБВГДЕЁЖЗИЙКЛМНОРПСТУФХЦЧШЩЪЫЬЭЮЯ"; + +int +u8_tolower (const signed char *c, int l, char *out) { + if (*c > 0) { + *out = tolower (*c); + out[1] = 0; + return 1; + } + else { + for (int i = 0; i < sizeof (upperchars)-l; i++) { + if (!memcmp (upperchars+i, c, l)) { + // found! + memcpy (out, lowerchars+i, l); + out[l] = 0; + return l; + } + } + memcpy (out, c, l); + out[l] = 0; + return l; + } +} + +const char * +utfcasestr (const char *s1, const char *s2) { +#if 0 // small u8_tolower test + while (*s2) { + int32_t i = 0; + u8_nextchar (s2, &i); + const char *next = s2 + i; + char lw[10]; + int l = u8_tolower (s2, next-s2, lw); + s2 = next; + fprintf (stderr, "%s", lw); + } + fprintf (stderr, "\n"); + return NULL; +#endif + while (*s1) { + const char *p1 = s1; + const char *p2 = s2; + while (*p2 && *p1) { + int32_t i1 = 0; + int32_t i2 = 0; + char lw1[10]; + char lw2[10]; + const char *next; + u8_nextchar (p1, &i1); + u8_nextchar (p2, &i2); + int l1 = u8_tolower (p1, i1, lw1); + int l2 = u8_tolower (p2, i2, lw2); + //fprintf (stderr, "comparing %s to %s\n", lw1, lw2); + if (strcmp (lw1, lw2)) { + //fprintf (stderr, "fail\n"); + break; + } + p1 += i1; + p2 += i2; + } + if (*p2 == 0) { + //fprintf (stderr, "%s found in %s\n", s2, s1); + return p1; + } + int32_t i = 0; + u8_nextchar (s1, &i); + s1 += i; + } + return NULL; +} -- cgit v1.2.3