optimizations in search and sorting

author: Alexey Yakovenko <wakeroid@gmail.com> 2010-05-02 12:41:20 +0200
committer: Alexey Yakovenko <wakeroid@gmail.com> 2010-05-02 12:41:20 +0200
commit: 2e53930ee9f06102ee39bf95b21775a563679d31 (patch)
tree: c9b65db0e54b576e91eed4e61dc1aafb2453aa3f
parent: 95c829fb75ed7c4ecb933073f5ed59d2b647c7f4 (diff)
7 files changed, 467 insertions, 6 deletions
diff --git a/main.c b/main.c
index a00910e0..e26ce0bf 100644
--- a/main.c
+++ b/main.c
@@ -428,6 +428,8 @@ sigterm_handler (int sig) {
 
 int
 main (int argc, char *argv[]) {
+    //u8_lc_map_test ();
+    //return -1;
     setlocale (LC_NUMERIC, "C");
     fprintf (stderr, "starting deadbeef " VERSION "\n");
     srand (time (NULL));
diff --git a/playlist.c b/playlist.c
index 133d50d6..c11ec2e7 100644
--- a/playlist.c
+++ b/playlist.c
@@ -26,6 +26,7 @@
 #include <unistd.h>
 #include <assert.h>
 #include <time.h>
+#include <sys/time.h>
 #ifndef __linux__
 #define _POSIX_C_SOURCE
 #endif
@@ -2425,11 +2426,13 @@ pl_format_title (playItem_t *it, int idx, char *s, int size, int id, const char
 }
 
 void
-pl_sort (int iter, int id, const char *format, int ascending) {
+pl_sort_bubble (int iter, int id, const char *format, int ascending) {
     if (id == DB_COLUMN_FILENUMBER) {
         return;
     }
     GLOBAL_LOCK;
+    struct timeval tm1;
+    gettimeofday (&tm1, NULL);
     int sorted = 0;
     int is_duration = 0;
     int is_track = 0;
@@ -2439,8 +2442,9 @@ pl_sort (int iter, int id, const char *format, int ascending) {
     if (format && id == -1 && !strcmp (format, "%n")) {
         is_track = 1;
     }
-
+    int niters = 0;
     do {
+        niters++;
         sorted = 1;
         playItem_t *it;
         for (it = playlist->head[iter]; it; it = it->next[iter]) {
@@ -2498,6 +2502,131 @@ pl_sort (int iter, int id, const char *format, int ascending) {
             }
         }
     } while (!sorted);
+    struct timeval tm2;
+    gettimeofday (&tm2, NULL);
+
+    int ms = (tm2.tv_sec*1000+tm2.tv_usec/1000) - (tm1.tv_sec*1000+tm1.tv_usec/1000);
+    trace ("sort time: %f seconds, %d iterations\n", ms / 1000.f, niters);
+    GLOBAL_UNLOCK;
+}
+
+static int pl_sort_is_duration;
+static int pl_sort_is_track;
+static int pl_sort_ascending;
+static int pl_sort_id;
+static const char *pl_sort_format;
+
+static int
+pl_sort_compare (playItem_t *a, playItem_t *b) {
+    int cmp;
+    // next = b
+    // it = a
+    if (pl_sort_is_duration) {
+        cmp = pl_sort_ascending ? b->_duration - a->_duration : a->_duration - b->_duration;
+    }
+    else if (pl_sort_is_track) {
+        const char *t;
+        t = pl_find_meta (a, "track");
+        int a = t ? atoi (t) : -1;
+        t = pl_find_meta (b, "track");
+        int b = t ? atoi (t) : -1;
+        cmp = pl_sort_ascending ? b - a : a - b;
+    }
+    else {
+        char title1[1024];
+        char title2[1024];
+        pl_format_title (a, -1, title1, sizeof (title1), pl_sort_id, pl_sort_format);
+        pl_format_title (b, -1, title2, sizeof (title2), pl_sort_id, pl_sort_format);
+        cmp = pl_sort_ascending ? strcmp (title2, title1) : strcmp (title1, title2);
+    }
+    return cmp;
+}
+
+/* preform merge sort on the linked list */
+playItem_t *mergesort(playItem_t *head, int iter);
+/* merge the lists.. */
+playItem_t *merge(playItem_t *head_one, playItem_t *head_two, int iter);
+
+/* preform merge sort on the linked list */
+playItem_t *mergesort(playItem_t *head, int iter) {
+    playItem_t *head_one;
+    playItem_t *head_two;
+
+    if((head == NULL) || (head->next[iter] == NULL)) 
+        return head;
+
+    head_one = head;
+    head_two = head->next[iter];
+    while((head_two != NULL) && (head_two->next[iter] != NULL)) {
+        head = head->next[iter];
+        head_two = head->next[iter]->next[iter];
+    }
+    head_two = head->next[iter];
+    head->next[iter] = NULL;
+
+    return merge(mergesort(head_one, iter), mergesort(head_two, iter), iter);
+}
+
+/* merge the lists.. */
+playItem_t *merge(playItem_t *head_one, playItem_t *head_two, int iter) {
+    playItem_t *head_three;
+
+    if(head_one == NULL) 
+        return head_two;
+
+    if(head_two == NULL) 
+        return head_one;
+
+    int cmp = pl_sort_compare (head_one, head_two);
+    if(cmp < 0) {
+        head_three = head_one;
+        head_three->next[iter] = merge(head_one->next[iter], head_two, iter);
+    } else {
+        head_three = head_two;
+        head_three->next[iter] = merge(head_one, head_two->next[iter], iter);
+    }
+
+    return head_three;
+}
+
+void
+pl_sort (int iter, int id, const char *format, int ascending) {
+    if (id == DB_COLUMN_FILENUMBER || !playlist->head[iter] || !playlist->head[iter]->next[iter]) {
+        return;
+    }
+    GLOBAL_LOCK;
+    struct timeval tm1;
+    gettimeofday (&tm1, NULL);
+    pl_sort_ascending = ascending;
+    trace ("ascending: %d\n", ascending);
+    pl_sort_id = id;
+    pl_sort_format = format;
+    if (format && id == -1 && !strcmp (format, "%l")) {
+        pl_sort_is_duration = 1;
+    }
+    else {
+        pl_sort_is_duration = 0;
+    }
+    if (format && id == -1 && !strcmp (format, "%n")) {
+        pl_sort_is_track = 1;
+    }
+    else {
+        pl_sort_is_track = 0;
+    }
+
+    playlist->head[iter] = mergesort (playlist->head[iter], iter);
+    // update `prev` pointers
+    playItem_t *prev = NULL;
+    for (playItem_t *it = playlist->head[iter]; it; it = it->next[iter]) {
+        it->prev[iter] = prev;
+        prev = it;
+    }
+
+    struct timeval tm2;
+    gettimeofday (&tm2, NULL);
+
+    int ms = (tm2.tv_sec*1000+tm2.tv_usec/1000) - (tm1.tv_sec*1000+tm1.tv_usec/1000);
+    trace ("sort time: %f seconds\n", ms / 1000.f);
     GLOBAL_UNLOCK;
 }
 
diff --git a/u8_gperf.sh b/u8_gperf.sh
new file mode 100755
index 00000000..3ecffc3e
--- /dev/null
+++ b/u8_gperf.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+gperf -c -t -H u8_lc_hash -N u8_lc_in_word_set u8_lc_map.txt >u8_lc_map.h
diff --git a/u8_lc_map.h b/u8_lc_map.h
new file mode 100644
index 00000000..c3c5da49
--- /dev/null
+++ b/u8_lc_map.h
@@ -0,0 +1,231 @@
+/* C code produced by gperf version 3.0.4 */
+/* Command-line: gperf -c -t -H u8_lc_hash -N u8_lc_in_word_set u8_lc_map.txt  */
+/* Computed positions: -k'1-2' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
+#endif
+
+#line 1 "u8_lc_map.txt"
+struct u8_case_map_t {
+    const char *name;
+    const char *lower;
+};
+
+#define TOTAL_KEYWORDS 49
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 2
+#define MIN_HASH_VALUE 0
+#define MAX_HASH_VALUE 67
+/* maximum key range = 68, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+/*ARGSUSED*/
+static unsigned int
+u8_lc_hash (str, len)
+     register const char *str;
+     register unsigned int len;
+{
+  static unsigned char asso_values[] =
+    {
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 33, 60,
+      68, 68, 28, 23, 18, 13,  8,  3, 62, 68,
+      68, 61, 68, 68, 62, 50, 57, 40, 52, 47,
+      30, 42, 20, 37, 10, 32,  0, 27, 22, 17,
+      12,  7,  2, 61, 56, 51, 46, 41, 36, 31,
+      26, 21, 16, 11,  6,  1, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68,  5, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68,  0, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+      68, 68, 68, 68, 68, 68
+    };
+  return asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
+}
+
+#ifdef __GNUC__
+__inline
+#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+struct u8_case_map_t *
+u8_lc_in_word_set (str, len)
+     register const char *str;
+     register unsigned int len;
+{
+  static struct u8_case_map_t wordlist[] =
+    {
+#line 35 "u8_lc_map.txt"
+      {"\320\234", "м"},
+#line 54 "u8_lc_map.txt"
+      {"\320\257", "я"},
+#line 41 "u8_lc_map.txt"
+      {"\320\242", "т"},
+      {""}, {""},
+#line 12 "u8_lc_map.txt"
+      {"\303\234", "ü"},
+#line 53 "u8_lc_map.txt"
+      {"\320\256", "ю"},
+#line 40 "u8_lc_map.txt"
+      {"\320\241", "с"},
+#line 7 "u8_lc_map.txt"
+      {"\303\211", "é"},
+      {""},
+#line 33 "u8_lc_map.txt"
+      {"\320\232", "к"},
+#line 52 "u8_lc_map.txt"
+      {"\320\255", "э"},
+#line 38 "u8_lc_map.txt"
+      {"\320\240", "р"},
+#line 20 "u8_lc_map.txt"
+      {"\303\210", "è"},
+      {""},
+#line 11 "u8_lc_map.txt"
+      {"\303\232", "ú"},
+#line 51 "u8_lc_map.txt"
+      {"\320\254", "ь"},
+#line 39 "u8_lc_map.txt"
+      {"\320\237", "п"},
+#line 19 "u8_lc_map.txt"
+      {"\303\207", "ç"},
+      {""},
+#line 31 "u8_lc_map.txt"
+      {"\320\230", "и"},
+#line 50 "u8_lc_map.txt"
+      {"\320\253", "ы"},
+#line 37 "u8_lc_map.txt"
+      {"\320\236", "о"},
+#line 16 "u8_lc_map.txt"
+      {"\303\206", "æ"},
+      {""},
+#line 17 "u8_lc_map.txt"
+      {"\303\230", "ø"},
+#line 49 "u8_lc_map.txt"
+      {"\320\252", "ъ"},
+#line 36 "u8_lc_map.txt"
+      {"\320\235", "н"},
+#line 15 "u8_lc_map.txt"
+      {"\303\205", "å"},
+      {""},
+#line 29 "u8_lc_map.txt"
+      {"\320\226", "ж"},
+#line 48 "u8_lc_map.txt"
+      {"\320\251", "щ"},
+#line 34 "u8_lc_map.txt"
+      {"\320\233", "л"},
+#line 13 "u8_lc_map.txt"
+      {"\303\204", "ä"},
+      {""},
+#line 14 "u8_lc_map.txt"
+      {"\303\226", "ö"},
+#line 47 "u8_lc_map.txt"
+      {"\320\250", "ш"},
+#line 32 "u8_lc_map.txt"
+      {"\320\231", "й"},
+#line 18 "u8_lc_map.txt"
+      {"\303\200", "à"},
+      {""},
+#line 25 "u8_lc_map.txt"
+      {"\320\223", "г"},
+#line 46 "u8_lc_map.txt"
+      {"\320\247", "ч"},
+#line 30 "u8_lc_map.txt"
+      {"\320\227", "з"},
+      {""}, {""},
+#line 10 "u8_lc_map.txt"
+      {"\303\223", "ó"},
+#line 45 "u8_lc_map.txt"
+      {"\320\246", "ц"},
+#line 27 "u8_lc_map.txt"
+      {"\320\225", "е"},
+      {""}, {""},
+#line 23 "u8_lc_map.txt"
+      {"\320\221", "б"},
+#line 44 "u8_lc_map.txt"
+      {"\320\245", "х"},
+#line 26 "u8_lc_map.txt"
+      {"\320\224", "д"},
+      {""}, {""},
+#line 9 "u8_lc_map.txt"
+      {"\303\221", "ñ"},
+#line 43 "u8_lc_map.txt"
+      {"\320\244", "ф"},
+#line 24 "u8_lc_map.txt"
+      {"\320\222", "в"},
+      {""}, {""},
+#line 28 "u8_lc_map.txt"
+      {"\320\201", "ё"},
+#line 42 "u8_lc_map.txt"
+      {"\320\243", "у"},
+#line 22 "u8_lc_map.txt"
+      {"\320\220", "а"},
+      {""}, {""},
+#line 6 "u8_lc_map.txt"
+      {"\303\201", "á"},
+#line 8 "u8_lc_map.txt"
+      {"\303\215", "í"},
+#line 21 "u8_lc_map.txt"
+      {"\303\212", "ê"}
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register int key = u8_lc_hash (str, len);
+
+      if (key <= MAX_HASH_VALUE && key >= 0)
+        {
+          register const char *s = wordlist[key].name;
+
+          if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
+            return &wordlist[key];
+        }
+    }
+  return 0;
+}
+#line 55 "u8_lc_map.txt"
+
diff --git a/u8_lc_map.txt b/u8_lc_map.txt
new file mode 100644
index 00000000..43b6927f
--- /dev/null
+++ b/u8_lc_map.txt
@@ -0,0 +1,55 @@
+struct u8_case_map_t {
+    const char *name;
+    const char *lower;
+};
+%%
+Á, "á"
+É, "é"
+Í, "í"
+Ñ, "ñ"
+Ó, "ó"
+Ú, "ú"
+Ü, "ü"
+Ä, "ä"
+Ö, "ö"
+Å, "å"
+Æ, "æ"
+Ø, "ø"
+À, "à"
+Ç, "ç"
+È, "è"
+Ê, "ê"
+А, "а"
+Б, "б"
+В, "в"
+Г, "г"
+Д, "д"
+Е, "е"
+Ё, "ё"
+Ж, "ж"
+З, "з"
+И, "и"
+Й, "й"
+К, "к"
+Л, "л"
+М, "м"
+Н, "н"
+О, "о"
+Р, "р"
+П, "п"
+С, "с"
+Т, "т"
+У, "у"
+Ф, "ф"
+Х, "х"
+Ц, "ц"
+Ч, "ч"
+Ш, "ш"
+Щ, "щ"
+Ъ, "ъ"
+Ы, "ы"
+Ь, "ь"
+Э, "э"
+Ю, "ю"
+Я, "я"
+%%
diff --git a/utf8.c b/utf8.c
index b55bc0de..6e6bb100 100644
--- a/utf8.c
+++ b/utf8.c
@@ -26,6 +26,7 @@
 //#include <alloca.h>
 #include "ctype.h"
 #include "utf8.h"
+#include "u8_lc_map.h"
 
 static const uint32_t offsetsFromUTF8[6] = {
     0x00000000UL, 0x00003080UL, 0x000E2080UL,
@@ -599,17 +600,33 @@ int u8_valid (const char  *str,
         return 1;
 }
 
-static const char lowerchars[] = "záéíñóúüäöåæøàçèéêабвгдеёжзийклмнорпстуфхцчшщъыьэюя";
-static const char upperchars[] = "ZÁÉÍÑÓÚÜÄÖÅÆØÀÇÈÉÊАБВГДЕЁЖЗИЙКЛМНОРПСТУФХЦЧШЩЪЫЬЭЮЯ";
+#if 0
+static const char lowerchars[] = "áéíñóúüäöåæøàçèêабвгдеёжзийклмнорпстуфхцчшщъыьэюя";
+static const char upperchars[] = "ÁÉÍÑÓÚÜÄÖÅÆØÀÇÈÊАБВГДЕЁЖЗИЙКЛМНОРПСТУФХЦЧШЩЪЫЬЭЮЯ";
+#endif
 
 int
 u8_tolower (const signed char *c, int l, char *out) {
-    if (*c > 0) {
-        *out = tolower (*c);
+    if (*c >= 65 && *c <= 90) {
+        *out = *c + 0x20;//tolower (*c);
+        out[1] = 0;
+        return 1;
+    }
+    else if (*c > 0) {
+        *out = *c;
         out[1] = 0;
         return 1;
     }
     else {
+#if 1
+        struct u8_case_map_t *lc = u8_lc_in_word_set (c, l);
+        if (lc) {
+            int ll = 2;//strlen (lc->lower);
+            memcpy (out, lc->lower, ll);
+            out[ll] = 0;
+            return ll;
+        }
+#else
         for (int i = 0; i < sizeof (upperchars)-l; i++) {
             if (!memcmp (upperchars+i, c, l)) {
                 // found!
@@ -618,6 +635,7 @@ u8_tolower (const signed char *c, int l, char *out) {
                 return l;
             }
         }
+#endif
         memcpy (out, c, l);
         out[l] = 0;
         return l;
@@ -670,3 +688,22 @@ utfcasestr (const char *s1, const char *s2) {
     }
     return NULL;
 }
+
+void
+u8_lc_map_test (void) {
+    struct u8_case_map_t *lc;
+    lc = u8_lc_in_word_set ("Á", 2);
+    printf ("%s -> %s\n", lc->name, lc->lower);
+    lc = u8_lc_in_word_set ("É", 2);
+    printf ("%s -> %s\n", lc->name, lc->lower);
+    lc = u8_lc_in_word_set ("Í", 2);
+    printf ("%s -> %s\n", lc->name, lc->lower);
+    lc = u8_lc_in_word_set ("Ñ", 2);
+    printf ("%s -> %s\n", lc->name, lc->lower);
+    lc = u8_lc_in_word_set ("П", 2);
+    printf ("%s -> %s\n", lc->name, lc->lower);
+    lc = u8_lc_in_word_set ("Л", 2);
+    printf ("%s -> %s\n", lc->name, lc->lower);
+    lc = u8_lc_in_word_set ("А", 2);
+    printf ("%s -> %s\n", lc->name, lc->lower);
+}
diff --git a/utf8.h b/utf8.h
index 942f98e7..7ec66ef5 100644
--- a/utf8.h
+++ b/utf8.h
@@ -19,6 +19,9 @@
     by Jeff Bezanson
     placed in the public domain Fall 2005
 */
+#ifndef __UTF8_H
+#define __UTF8_H
+
 #include <stdint.h>
 #include <stdarg.h>
 
@@ -97,3 +100,5 @@ int u8_valid (const char  *str,
 
 const char *
 utfcasestr (const char *s1, const char *s2);
+
+#endif
author	Alexey Yakovenko <wakeroid@gmail.com>	2010-05-02 12:41:20 +0200
committer	Alexey Yakovenko <wakeroid@gmail.com>	2010-05-02 12:41:20 +0200
commit	2e53930ee9f06102ee39bf95b21775a563679d31 (patch)
tree	c9b65db0e54b576e91eed4e61dc1aafb2453aa3f
parent	95c829fb75ed7c4ecb933073f5ed59d2b647c7f4 (diff)