summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorGravatar Alexey Yakovenko <wakeroid@gmail.com>2009-09-13 17:06:02 +0200
committerGravatar Alexey Yakovenko <wakeroid@gmail.com>2009-09-13 17:06:02 +0200
commite1756130511cafa5127dc18a8d3844ddd63961a0 (patch)
tree010b0f6ebd726f881548ebd788fd4f82ea56b805 /utf8.c
parentc96c663271940785d608647736b78434bd1d2020 (diff)
added case insensitive search for several languages
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c72
1 files changed, 72 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index 06a9e3bb..6c9ada11 100644
--- a/utf8.c
+++ b/utf8.c
@@ -18,6 +18,7 @@
#include <string.h>
#include <stdarg.h>
#include <alloca.h>
+#include "ctype.h"
#include "utf8.h"
static const uint32_t offsetsFromUTF8[6] = {
@@ -592,3 +593,74 @@ int u8_valid (const char *str,
return 1;
}
+static const char lowerchars[] = "záéíñóúüäöåæøàçèéêабвгдеёжзийклмнорпстуфхцчшщъыьэюя";
+static const char upperchars[] = "ZÁÉÍÑÓÚÜÄÖÅÆØÀÇÈÉÊАБВГДЕЁЖЗИЙКЛМНОРПСТУФХЦЧШЩЪЫЬЭЮЯ";
+
+int
+u8_tolower (const signed char *c, int l, char *out) {
+ if (*c > 0) {
+ *out = tolower (*c);
+ out[1] = 0;
+ return 1;
+ }
+ else {
+ for (int i = 0; i < sizeof (upperchars)-l; i++) {
+ if (!memcmp (upperchars+i, c, l)) {
+ // found!
+ memcpy (out, lowerchars+i, l);
+ out[l] = 0;
+ return l;
+ }
+ }
+ memcpy (out, c, l);
+ out[l] = 0;
+ return l;
+ }
+}
+
+const char *
+utfcasestr (const char *s1, const char *s2) {
+#if 0 // small u8_tolower test
+ while (*s2) {
+ int32_t i = 0;
+ u8_nextchar (s2, &i);
+ const char *next = s2 + i;
+ char lw[10];
+ int l = u8_tolower (s2, next-s2, lw);
+ s2 = next;
+ fprintf (stderr, "%s", lw);
+ }
+ fprintf (stderr, "\n");
+ return NULL;
+#endif
+ while (*s1) {
+ const char *p1 = s1;
+ const char *p2 = s2;
+ while (*p2 && *p1) {
+ int32_t i1 = 0;
+ int32_t i2 = 0;
+ char lw1[10];
+ char lw2[10];
+ const char *next;
+ u8_nextchar (p1, &i1);
+ u8_nextchar (p2, &i2);
+ int l1 = u8_tolower (p1, i1, lw1);
+ int l2 = u8_tolower (p2, i2, lw2);
+ //fprintf (stderr, "comparing %s to %s\n", lw1, lw2);
+ if (strcmp (lw1, lw2)) {
+ //fprintf (stderr, "fail\n");
+ break;
+ }
+ p1 += i1;
+ p2 += i2;
+ }
+ if (*p2 == 0) {
+ //fprintf (stderr, "%s found in %s\n", s2, s1);
+ return p1;
+ }
+ int32_t i = 0;
+ u8_nextchar (s1, &i);
+ s1 += i;
+ }
+ return NULL;
+}