From 2e53930ee9f06102ee39bf95b21775a563679d31 Mon Sep 17 00:00:00 2001 From: Alexey Yakovenko Date: Sun, 2 May 2010 12:41:20 +0200 Subject: optimizations in search and sorting --- main.c | 2 + playlist.c | 133 ++++++++++++++++++++++++++++++++- u8_gperf.sh | 2 + u8_lc_map.h | 231 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ u8_lc_map.txt | 55 ++++++++++++++ utf8.c | 45 +++++++++++- utf8.h | 5 ++ 7 files changed, 467 insertions(+), 6 deletions(-) create mode 100755 u8_gperf.sh create mode 100644 u8_lc_map.h create mode 100644 u8_lc_map.txt diff --git a/main.c b/main.c index a00910e0..e26ce0bf 100644 --- a/main.c +++ b/main.c @@ -428,6 +428,8 @@ sigterm_handler (int sig) { int main (int argc, char *argv[]) { + //u8_lc_map_test (); + //return -1; setlocale (LC_NUMERIC, "C"); fprintf (stderr, "starting deadbeef " VERSION "\n"); srand (time (NULL)); diff --git a/playlist.c b/playlist.c index 133d50d6..c11ec2e7 100644 --- a/playlist.c +++ b/playlist.c @@ -26,6 +26,7 @@ #include #include #include +#include #ifndef __linux__ #define _POSIX_C_SOURCE #endif @@ -2425,11 +2426,13 @@ pl_format_title (playItem_t *it, int idx, char *s, int size, int id, const char } void -pl_sort (int iter, int id, const char *format, int ascending) { +pl_sort_bubble (int iter, int id, const char *format, int ascending) { if (id == DB_COLUMN_FILENUMBER) { return; } GLOBAL_LOCK; + struct timeval tm1; + gettimeofday (&tm1, NULL); int sorted = 0; int is_duration = 0; int is_track = 0; @@ -2439,8 +2442,9 @@ pl_sort (int iter, int id, const char *format, int ascending) { if (format && id == -1 && !strcmp (format, "%n")) { is_track = 1; } - + int niters = 0; do { + niters++; sorted = 1; playItem_t *it; for (it = playlist->head[iter]; it; it = it->next[iter]) { @@ -2498,6 +2502,131 @@ pl_sort (int iter, int id, const char *format, int ascending) { } } } while (!sorted); + struct timeval tm2; + gettimeofday (&tm2, NULL); + + int ms = (tm2.tv_sec*1000+tm2.tv_usec/1000) - (tm1.tv_sec*1000+tm1.tv_usec/1000); + trace ("sort time: %f seconds, %d iterations\n", ms / 1000.f, niters); + GLOBAL_UNLOCK; +} + +static int pl_sort_is_duration; +static int pl_sort_is_track; +static int pl_sort_ascending; +static int pl_sort_id; +static const char *pl_sort_format; + +static int +pl_sort_compare (playItem_t *a, playItem_t *b) { + int cmp; + // next = b + // it = a + if (pl_sort_is_duration) { + cmp = pl_sort_ascending ? b->_duration - a->_duration : a->_duration - b->_duration; + } + else if (pl_sort_is_track) { + const char *t; + t = pl_find_meta (a, "track"); + int a = t ? atoi (t) : -1; + t = pl_find_meta (b, "track"); + int b = t ? atoi (t) : -1; + cmp = pl_sort_ascending ? b - a : a - b; + } + else { + char title1[1024]; + char title2[1024]; + pl_format_title (a, -1, title1, sizeof (title1), pl_sort_id, pl_sort_format); + pl_format_title (b, -1, title2, sizeof (title2), pl_sort_id, pl_sort_format); + cmp = pl_sort_ascending ? strcmp (title2, title1) : strcmp (title1, title2); + } + return cmp; +} + +/* preform merge sort on the linked list */ +playItem_t *mergesort(playItem_t *head, int iter); +/* merge the lists.. */ +playItem_t *merge(playItem_t *head_one, playItem_t *head_two, int iter); + +/* preform merge sort on the linked list */ +playItem_t *mergesort(playItem_t *head, int iter) { + playItem_t *head_one; + playItem_t *head_two; + + if((head == NULL) || (head->next[iter] == NULL)) + return head; + + head_one = head; + head_two = head->next[iter]; + while((head_two != NULL) && (head_two->next[iter] != NULL)) { + head = head->next[iter]; + head_two = head->next[iter]->next[iter]; + } + head_two = head->next[iter]; + head->next[iter] = NULL; + + return merge(mergesort(head_one, iter), mergesort(head_two, iter), iter); +} + +/* merge the lists.. */ +playItem_t *merge(playItem_t *head_one, playItem_t *head_two, int iter) { + playItem_t *head_three; + + if(head_one == NULL) + return head_two; + + if(head_two == NULL) + return head_one; + + int cmp = pl_sort_compare (head_one, head_two); + if(cmp < 0) { + head_three = head_one; + head_three->next[iter] = merge(head_one->next[iter], head_two, iter); + } else { + head_three = head_two; + head_three->next[iter] = merge(head_one, head_two->next[iter], iter); + } + + return head_three; +} + +void +pl_sort (int iter, int id, const char *format, int ascending) { + if (id == DB_COLUMN_FILENUMBER || !playlist->head[iter] || !playlist->head[iter]->next[iter]) { + return; + } + GLOBAL_LOCK; + struct timeval tm1; + gettimeofday (&tm1, NULL); + pl_sort_ascending = ascending; + trace ("ascending: %d\n", ascending); + pl_sort_id = id; + pl_sort_format = format; + if (format && id == -1 && !strcmp (format, "%l")) { + pl_sort_is_duration = 1; + } + else { + pl_sort_is_duration = 0; + } + if (format && id == -1 && !strcmp (format, "%n")) { + pl_sort_is_track = 1; + } + else { + pl_sort_is_track = 0; + } + + playlist->head[iter] = mergesort (playlist->head[iter], iter); + // update `prev` pointers + playItem_t *prev = NULL; + for (playItem_t *it = playlist->head[iter]; it; it = it->next[iter]) { + it->prev[iter] = prev; + prev = it; + } + + struct timeval tm2; + gettimeofday (&tm2, NULL); + + int ms = (tm2.tv_sec*1000+tm2.tv_usec/1000) - (tm1.tv_sec*1000+tm1.tv_usec/1000); + trace ("sort time: %f seconds\n", ms / 1000.f); GLOBAL_UNLOCK; } diff --git a/u8_gperf.sh b/u8_gperf.sh new file mode 100755 index 00000000..3ecffc3e --- /dev/null +++ b/u8_gperf.sh @@ -0,0 +1,2 @@ +#!/bin/sh +gperf -c -t -H u8_lc_hash -N u8_lc_in_word_set u8_lc_map.txt >u8_lc_map.h diff --git a/u8_lc_map.h b/u8_lc_map.h new file mode 100644 index 00000000..c3c5da49 --- /dev/null +++ b/u8_lc_map.h @@ -0,0 +1,231 @@ +/* C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -c -t -H u8_lc_hash -N u8_lc_in_word_set u8_lc_map.txt */ +/* Computed positions: -k'1-2' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + +#line 1 "u8_lc_map.txt" +struct u8_case_map_t { + const char *name; + const char *lower; +}; + +#define TOTAL_KEYWORDS 49 +#define MIN_WORD_LENGTH 2 +#define MAX_WORD_LENGTH 2 +#define MIN_HASH_VALUE 0 +#define MAX_HASH_VALUE 67 +/* maximum key range = 68, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +/*ARGSUSED*/ +static unsigned int +u8_lc_hash (str, len) + register const char *str; + register unsigned int len; +{ + static unsigned char asso_values[] = + { + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 33, 60, + 68, 68, 28, 23, 18, 13, 8, 3, 62, 68, + 68, 61, 68, 68, 62, 50, 57, 40, 52, 47, + 30, 42, 20, 37, 10, 32, 0, 27, 22, 17, + 12, 7, 2, 61, 56, 51, 46, 41, 36, 31, + 26, 21, 16, 11, 6, 1, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 5, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 0, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68 + }; + return asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]]; +} + +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +struct u8_case_map_t * +u8_lc_in_word_set (str, len) + register const char *str; + register unsigned int len; +{ + static struct u8_case_map_t wordlist[] = + { +#line 35 "u8_lc_map.txt" + {"\320\234", "м"}, +#line 54 "u8_lc_map.txt" + {"\320\257", "я"}, +#line 41 "u8_lc_map.txt" + {"\320\242", "т"}, + {""}, {""}, +#line 12 "u8_lc_map.txt" + {"\303\234", "ü"}, +#line 53 "u8_lc_map.txt" + {"\320\256", "ю"}, +#line 40 "u8_lc_map.txt" + {"\320\241", "с"}, +#line 7 "u8_lc_map.txt" + {"\303\211", "é"}, + {""}, +#line 33 "u8_lc_map.txt" + {"\320\232", "к"}, +#line 52 "u8_lc_map.txt" + {"\320\255", "э"}, +#line 38 "u8_lc_map.txt" + {"\320\240", "р"}, +#line 20 "u8_lc_map.txt" + {"\303\210", "è"}, + {""}, +#line 11 "u8_lc_map.txt" + {"\303\232", "ú"}, +#line 51 "u8_lc_map.txt" + {"\320\254", "ь"}, +#line 39 "u8_lc_map.txt" + {"\320\237", "п"}, +#line 19 "u8_lc_map.txt" + {"\303\207", "ç"}, + {""}, +#line 31 "u8_lc_map.txt" + {"\320\230", "и"}, +#line 50 "u8_lc_map.txt" + {"\320\253", "ы"}, +#line 37 "u8_lc_map.txt" + {"\320\236", "о"}, +#line 16 "u8_lc_map.txt" + {"\303\206", "æ"}, + {""}, +#line 17 "u8_lc_map.txt" + {"\303\230", "ø"}, +#line 49 "u8_lc_map.txt" + {"\320\252", "ъ"}, +#line 36 "u8_lc_map.txt" + {"\320\235", "н"}, +#line 15 "u8_lc_map.txt" + {"\303\205", "å"}, + {""}, +#line 29 "u8_lc_map.txt" + {"\320\226", "ж"}, +#line 48 "u8_lc_map.txt" + {"\320\251", "щ"}, +#line 34 "u8_lc_map.txt" + {"\320\233", "л"}, +#line 13 "u8_lc_map.txt" + {"\303\204", "ä"}, + {""}, +#line 14 "u8_lc_map.txt" + {"\303\226", "ö"}, +#line 47 "u8_lc_map.txt" + {"\320\250", "ш"}, +#line 32 "u8_lc_map.txt" + {"\320\231", "й"}, +#line 18 "u8_lc_map.txt" + {"\303\200", "à"}, + {""}, +#line 25 "u8_lc_map.txt" + {"\320\223", "г"}, +#line 46 "u8_lc_map.txt" + {"\320\247", "ч"}, +#line 30 "u8_lc_map.txt" + {"\320\227", "з"}, + {""}, {""}, +#line 10 "u8_lc_map.txt" + {"\303\223", "ó"}, +#line 45 "u8_lc_map.txt" + {"\320\246", "ц"}, +#line 27 "u8_lc_map.txt" + {"\320\225", "е"}, + {""}, {""}, +#line 23 "u8_lc_map.txt" + {"\320\221", "б"}, +#line 44 "u8_lc_map.txt" + {"\320\245", "х"}, +#line 26 "u8_lc_map.txt" + {"\320\224", "д"}, + {""}, {""}, +#line 9 "u8_lc_map.txt" + {"\303\221", "ñ"}, +#line 43 "u8_lc_map.txt" + {"\320\244", "ф"}, +#line 24 "u8_lc_map.txt" + {"\320\222", "в"}, + {""}, {""}, +#line 28 "u8_lc_map.txt" + {"\320\201", "ё"}, +#line 42 "u8_lc_map.txt" + {"\320\243", "у"}, +#line 22 "u8_lc_map.txt" + {"\320\220", "а"}, + {""}, {""}, +#line 6 "u8_lc_map.txt" + {"\303\201", "á"}, +#line 8 "u8_lc_map.txt" + {"\303\215", "í"}, +#line 21 "u8_lc_map.txt" + {"\303\212", "ê"} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = u8_lc_hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + return 0; +} +#line 55 "u8_lc_map.txt" + diff --git a/u8_lc_map.txt b/u8_lc_map.txt new file mode 100644 index 00000000..43b6927f --- /dev/null +++ b/u8_lc_map.txt @@ -0,0 +1,55 @@ +struct u8_case_map_t { + const char *name; + const char *lower; +}; +%% +Á, "á" +É, "é" +Í, "í" +Ñ, "ñ" +Ó, "ó" +Ú, "ú" +Ü, "ü" +Ä, "ä" +Ö, "ö" +Å, "å" +Æ, "æ" +Ø, "ø" +À, "à" +Ç, "ç" +È, "è" +Ê, "ê" +А, "а" +Б, "б" +В, "в" +Г, "г" +Д, "д" +Е, "е" +Ё, "ё" +Ж, "ж" +З, "з" +И, "и" +Й, "й" +К, "к" +Л, "л" +М, "м" +Н, "н" +О, "о" +Р, "р" +П, "п" +С, "с" +Т, "т" +У, "у" +Ф, "ф" +Х, "х" +Ц, "ц" +Ч, "ч" +Ш, "ш" +Щ, "щ" +Ъ, "ъ" +Ы, "ы" +Ь, "ь" +Э, "э" +Ю, "ю" +Я, "я" +%% diff --git a/utf8.c b/utf8.c index b55bc0de..6e6bb100 100644 --- a/utf8.c +++ b/utf8.c @@ -26,6 +26,7 @@ //#include #include "ctype.h" #include "utf8.h" +#include "u8_lc_map.h" static const uint32_t offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, @@ -599,17 +600,33 @@ int u8_valid (const char *str, return 1; } -static const char lowerchars[] = "záéíñóúüäöåæøàçèéêабвгдеёжзийклмнорпстуфхцчшщъыьэюя"; -static const char upperchars[] = "ZÁÉÍÑÓÚÜÄÖÅÆØÀÇÈÉÊАБВГДЕЁЖЗИЙКЛМНОРПСТУФХЦЧШЩЪЫЬЭЮЯ"; +#if 0 +static const char lowerchars[] = "áéíñóúüäöåæøàçèêабвгдеёжзийклмнорпстуфхцчшщъыьэюя"; +static const char upperchars[] = "ÁÉÍÑÓÚÜÄÖÅÆØÀÇÈÊАБВГДЕЁЖЗИЙКЛМНОРПСТУФХЦЧШЩЪЫЬЭЮЯ"; +#endif int u8_tolower (const signed char *c, int l, char *out) { - if (*c > 0) { - *out = tolower (*c); + if (*c >= 65 && *c <= 90) { + *out = *c + 0x20;//tolower (*c); + out[1] = 0; + return 1; + } + else if (*c > 0) { + *out = *c; out[1] = 0; return 1; } else { +#if 1 + struct u8_case_map_t *lc = u8_lc_in_word_set (c, l); + if (lc) { + int ll = 2;//strlen (lc->lower); + memcpy (out, lc->lower, ll); + out[ll] = 0; + return ll; + } +#else for (int i = 0; i < sizeof (upperchars)-l; i++) { if (!memcmp (upperchars+i, c, l)) { // found! @@ -618,6 +635,7 @@ u8_tolower (const signed char *c, int l, char *out) { return l; } } +#endif memcpy (out, c, l); out[l] = 0; return l; @@ -670,3 +688,22 @@ utfcasestr (const char *s1, const char *s2) { } return NULL; } + +void +u8_lc_map_test (void) { + struct u8_case_map_t *lc; + lc = u8_lc_in_word_set ("Á", 2); + printf ("%s -> %s\n", lc->name, lc->lower); + lc = u8_lc_in_word_set ("É", 2); + printf ("%s -> %s\n", lc->name, lc->lower); + lc = u8_lc_in_word_set ("Í", 2); + printf ("%s -> %s\n", lc->name, lc->lower); + lc = u8_lc_in_word_set ("Ñ", 2); + printf ("%s -> %s\n", lc->name, lc->lower); + lc = u8_lc_in_word_set ("П", 2); + printf ("%s -> %s\n", lc->name, lc->lower); + lc = u8_lc_in_word_set ("Л", 2); + printf ("%s -> %s\n", lc->name, lc->lower); + lc = u8_lc_in_word_set ("А", 2); + printf ("%s -> %s\n", lc->name, lc->lower); +} diff --git a/utf8.h b/utf8.h index 942f98e7..7ec66ef5 100644 --- a/utf8.h +++ b/utf8.h @@ -19,6 +19,9 @@ by Jeff Bezanson placed in the public domain Fall 2005 */ +#ifndef __UTF8_H +#define __UTF8_H + #include #include @@ -97,3 +100,5 @@ int u8_valid (const char *str, const char * utfcasestr (const char *s1, const char *s2); + +#endif -- cgit v1.2.3