summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorGravatar waker <wakeroid@gmail.com>2011-04-26 22:31:06 +0200
committerGravatar waker <wakeroid@gmail.com>2011-04-26 22:31:06 +0200
commitd95b00a4d606b97469fb38c69de66915ae9a0dd0 (patch)
tree49257fe2e501f7c4a6dc8c3ed17f90e1c731ea41 /utf8.c
parent9d63b3e8d5466c0c9a775eb0d1f159e2aa575f66 (diff)
optimized search
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c61
1 files changed, 45 insertions, 16 deletions
diff --git a/utf8.c b/utf8.c
index 6249f94a..3f703ae8 100644
--- a/utf8.c
+++ b/utf8.c
@@ -605,6 +605,18 @@ static const char upperchars[] = "ÁÉÍÑÓÚÜÄÖÅÆØÀÇÈÊАБВГДЕЁ
#endif
int
+u8_tolower_slow (const char *input, int len, char *out) {
+ struct u8_case_map_t *lc = u8_lc_in_word_set (input, len);
+ if (lc) {
+ int ll = strlen (lc->lower);
+ memcpy (out, lc->lower, ll);
+ out[ll] = 0;
+ return ll;
+ }
+ return 0;
+}
+
+int
u8_tolower (const signed char *c, int l, char *out) {
if (*c >= 65 && *c <= 90) {
*out = *c + 0x20;//tolower (*c);
@@ -617,24 +629,10 @@ u8_tolower (const signed char *c, int l, char *out) {
return 1;
}
else {
-#if 1
- struct u8_case_map_t *lc = u8_lc_in_word_set (c, l);
- if (lc) {
- int ll = 2;//strlen (lc->lower);
- memcpy (out, lc->lower, ll);
- out[ll] = 0;
+ int ll = u8_tolower_slow (c, l, out);
+ if (ll) {
return ll;
}
-#else
- for (int i = 0; i < sizeof (upperchars)-l; i++) {
- if (!memcmp (upperchars+i, c, l)) {
- // found!
- memcpy (out, lowerchars+i, l);
- out[l] = 0;
- return l;
- }
- }
-#endif
memcpy (out, c, l);
out[l] = 0;
return l;
@@ -688,6 +686,37 @@ utfcasestr (const char *s1, const char *s2) {
return NULL;
}
+#define min(x,y) ((x)<(y)?(x):(y))
+// s2 must be lowercase
+const char *
+utfcasestr_fast (const char *s1, const char *s2) {
+ while (*s1) {
+ const char *p1 = s1;
+ const char *p2 = s2;
+ while (*p2 && *p1) {
+ int32_t i1 = 0;
+ int32_t i2 = 0;
+ char lw1[10];
+ const char *next;
+ u8_nextchar (p1, &i1);
+ u8_nextchar (p2, &i2);
+ int l1 = u8_tolower (p1, i1, lw1);
+ if (memcmp (lw1, p2, min(i2,l1))) {
+ break;
+ }
+ p1 += i1;
+ p2 += i2;
+ }
+ if (*p2 == 0) {
+ return p1;
+ }
+ int32_t i = 0;
+ u8_nextchar (s1, &i);
+ s1 += i;
+ }
+ return NULL;
+}
+
int
u8_strcasecmp (const char *a, const char *b) {
const char *p1 = a, *p2 = b;