From d95b00a4d606b97469fb38c69de66915ae9a0dd0 Mon Sep 17 00:00:00 2001 From: waker Date: Tue, 26 Apr 2011 22:31:06 +0200 Subject: optimized search --- utf8.c | 61 +++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 16 deletions(-) (limited to 'utf8.c') diff --git a/utf8.c b/utf8.c index 6249f94a..3f703ae8 100644 --- a/utf8.c +++ b/utf8.c @@ -604,6 +604,18 @@ static const char lowerchars[] = "áéíñóúüäöåæøàçèêабвгдеё static const char upperchars[] = "ÁÉÍÑÓÚÜÄÖÅÆØÀÇÈÊАБВГДЕЁЖЗИЙКЛМНОРПСТУФХЦЧШЩЪЫЬЭЮЯ"; #endif +int +u8_tolower_slow (const char *input, int len, char *out) { + struct u8_case_map_t *lc = u8_lc_in_word_set (input, len); + if (lc) { + int ll = strlen (lc->lower); + memcpy (out, lc->lower, ll); + out[ll] = 0; + return ll; + } + return 0; +} + int u8_tolower (const signed char *c, int l, char *out) { if (*c >= 65 && *c <= 90) { @@ -617,24 +629,10 @@ u8_tolower (const signed char *c, int l, char *out) { return 1; } else { -#if 1 - struct u8_case_map_t *lc = u8_lc_in_word_set (c, l); - if (lc) { - int ll = 2;//strlen (lc->lower); - memcpy (out, lc->lower, ll); - out[ll] = 0; + int ll = u8_tolower_slow (c, l, out); + if (ll) { return ll; } -#else - for (int i = 0; i < sizeof (upperchars)-l; i++) { - if (!memcmp (upperchars+i, c, l)) { - // found! - memcpy (out, lowerchars+i, l); - out[l] = 0; - return l; - } - } -#endif memcpy (out, c, l); out[l] = 0; return l; @@ -688,6 +686,37 @@ utfcasestr (const char *s1, const char *s2) { return NULL; } +#define min(x,y) ((x)<(y)?(x):(y)) +// s2 must be lowercase +const char * +utfcasestr_fast (const char *s1, const char *s2) { + while (*s1) { + const char *p1 = s1; + const char *p2 = s2; + while (*p2 && *p1) { + int32_t i1 = 0; + int32_t i2 = 0; + char lw1[10]; + const char *next; + u8_nextchar (p1, &i1); + u8_nextchar (p2, &i2); + int l1 = u8_tolower (p1, i1, lw1); + if (memcmp (lw1, p2, min(i2,l1))) { + break; + } + p1 += i1; + p2 += i2; + } + if (*p2 == 0) { + return p1; + } + int32_t i = 0; + u8_nextchar (s1, &i); + s1 += i; + } + return NULL; +} + int u8_strcasecmp (const char *a, const char *b) { const char *p1 = a, *p2 = b; -- cgit v1.2.3