summaryrefslogtreecommitdiff
path: root/test/c/knucleotide.c
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2006-09-17 12:04:56 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2006-09-17 12:04:56 +0000
commit2ec5b3fb2ccb0120be641e077089f3da5e53d8a3 (patch)
treed1e6f6a9a3d99f0095dc96fe320214de68918158 /test/c/knucleotide.c
parente37d620f5b9b05e16563545cba9c538f8d31c746 (diff)
Davantage de tests
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@104 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'test/c/knucleotide.c')
-rw-r--r--test/c/knucleotide.c369
1 files changed, 369 insertions, 0 deletions
diff --git a/test/c/knucleotide.c b/test/c/knucleotide.c
new file mode 100644
index 0000000..f743892
--- /dev/null
+++ b/test/c/knucleotide.c
@@ -0,0 +1,369 @@
+/* The Computer Language Shootout
+ http://shootout.alioth.debian.org/
+
+ Contributed by Josh Goldfoot
+ to compile, use gcc -O3
+
+ This revision uses "simple_hash.h," available from
+ http://cvs.alioth.debian.org/cgi-bin/cvsweb.cgi/shootout/bench/Include/?cvsroot=shootout
+
+*/
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+enum { ht_num_primes = 28 };
+
+static unsigned long ht_prime_list[ht_num_primes] = {
+ 53ul, 97ul, 193ul, 389ul, 769ul,
+ 1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
+ 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
+ 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
+ 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
+ 1610612741ul, 3221225473ul, 4294967291ul
+};
+
+struct ht_node {
+ char *key;
+ int val;
+ struct ht_node *next;
+};
+
+struct ht_ht {
+ int size;
+ struct ht_node **tbl;
+ int iter_index;
+ struct ht_node *iter_next;
+ int items;
+#ifdef HT_DEBUG
+ int collisions;
+#endif /* HT_DEBUG */
+};
+
+/*inline*/ int ht_val(struct ht_node *node) {
+ return(node->val);
+}
+
+/*inline*/ char *ht_key(struct ht_node *node) {
+ return(node->key);
+}
+
+/*inline*/ int ht_hashcode(struct ht_ht *ht, char *key) {
+ unsigned long val = 0;
+ for (; *key; ++key) val = 5 * val + *key;
+ return(val % ht->size);
+}
+
+struct ht_node *ht_node_create(char *key) {
+ char *newkey;
+ struct ht_node *node;
+ if ((node = (struct ht_node *)malloc(sizeof(struct ht_node))) == 0) {
+ perror("malloc ht_node");
+ exit(1);
+ }
+ if ((newkey = (char *)strdup(key)) == 0) {
+ perror("strdup newkey");
+ exit(1);
+ }
+ node->key = newkey;
+ node->val = 0;
+ node->next = (struct ht_node *)NULL;
+ return(node);
+}
+
+struct ht_ht *ht_create(int size) {
+ int i = 0;
+ struct ht_ht *ht = (struct ht_ht *)malloc(sizeof(struct ht_ht));
+ while (ht_prime_list[i] < size) { i++; }
+ ht->size = ht_prime_list[i];
+ ht->tbl = (struct ht_node **)calloc(ht->size, sizeof(struct ht_node *));
+ ht->iter_index = 0;
+ ht->iter_next = 0;
+ ht->items = 0;
+#ifdef HT_DEBUG
+ ht->collisions = 0;
+#endif /* HT_DEBUG */
+ return(ht);
+}
+
+void ht_destroy(struct ht_ht *ht) {
+ struct ht_node *cur, *next;
+ int i;
+#ifdef HT_DEBUG
+ int chain_len;
+ int max_chain_len = 0;
+ int density = 0;
+ fprintf(stderr, " HT: size %d\n", ht->size);
+ fprintf(stderr, " HT: items %d\n", ht->items);
+ fprintf(stderr, " HT: collisions %d\n", ht->collisions);
+#endif /* HT_DEBUG */
+ for (i=0; i<ht->size; i++) {
+ next = ht->tbl[i];
+#ifdef HT_DEBUG
+ if (next) {
+ density++;
+ }
+ chain_len = 0;
+#endif /* HT_DEBUG */
+ while (next) {
+ cur = next;
+ next = next->next;
+ free(cur->key);
+ free(cur);
+#ifdef HT_DEBUG
+ chain_len++;
+#endif /* HT_DEBUG */
+ }
+#ifdef HT_DEBUG
+ if (chain_len > max_chain_len)
+ max_chain_len = chain_len;
+#endif /* HT_DEBUG */
+ }
+ free(ht->tbl);
+ free(ht);
+#ifdef HT_DEBUG
+ fprintf(stderr, " HT: density %d\n", density);
+ fprintf(stderr, " HT: max chain len %d\n", max_chain_len);
+#endif /* HT_DEBUG */
+}
+
+/*inline*/ struct ht_node *ht_find(struct ht_ht *ht, char *key) {
+ int hash_code = ht_hashcode(ht, key);
+ struct ht_node *node = ht->tbl[hash_code];
+ while (node) {
+ if (strcmp(key, node->key) == 0) return(node);
+ node = node->next;
+ }
+ return((struct ht_node *)NULL);
+}
+
+/*inline*/ struct ht_node *ht_find_new(struct ht_ht *ht, char *key) {
+ int hash_code = ht_hashcode(ht, key);
+ struct ht_node *prev = 0, *node = ht->tbl[hash_code];
+ while (node) {
+ if (strcmp(key, node->key) == 0) return(node);
+ prev = node;
+ node = node->next;
+#ifdef HT_DEBUG
+ ht->collisions++;
+#endif /* HT_DEBUG */
+ }
+ ht->items++;
+ if (prev) {
+ return(prev->next = ht_node_create(key));
+ } else {
+ return(ht->tbl[hash_code] = ht_node_create(key));
+ }
+}
+
+/*
+ * Hash Table iterator data/functions
+ */
+/*inline*/ struct ht_node *ht_next(struct ht_ht *ht) {
+ unsigned long index;
+ struct ht_node *node = ht->iter_next;
+ if (node) {
+ ht->iter_next = node->next;
+ return(node);
+ } else {
+ while (ht->iter_index < ht->size) {
+ index = ht->iter_index++;
+ if (ht->tbl[index]) {
+ ht->iter_next = ht->tbl[index]->next;
+ return(ht->tbl[index]);
+ }
+ }
+ }
+ return((struct ht_node *)NULL);
+}
+
+/*inline*/ struct ht_node *ht_first(struct ht_ht *ht) {
+ ht->iter_index = 0;
+ ht->iter_next = (struct ht_node *)NULL;
+ return(ht_next(ht));
+}
+
+/*inline*/ int ht_count(struct ht_ht *ht) {
+ return(ht->items);
+}
+
+long
+hash_table_size (int fl, long buflen)
+{
+ long maxsize1, maxsize2;
+
+ maxsize1 = buflen - fl;
+ maxsize2 = 4;
+ while (--fl > 0 && maxsize2 < maxsize1)
+ maxsize2 = maxsize2 * 4;
+ if (maxsize1 < maxsize2)
+ return maxsize1;
+ return maxsize2;
+}
+
+struct ht_ht *
+generate_frequencies (int fl, char *buffer, long buflen)
+{
+ struct ht_ht *ht;
+ char *reader;
+ long i;
+ char nulled;
+
+ if (fl > buflen)
+ return NULL;
+
+ ht = ht_create (hash_table_size (fl, buflen));
+ for (i = 0; i < buflen - fl + 1; i++)
+ {
+ reader = &(buffer[i]);
+ nulled = reader[fl];
+ reader[fl] = 0x00;
+ ht_find_new (ht, reader)->val++;
+ reader[fl] = nulled;
+ }
+ return ht;
+}
+
+typedef struct ssorter
+{
+ char *string;
+ int num;
+} sorter;
+
+void
+write_frequencies (int fl, char *buffer, long buflen)
+{
+ struct ht_ht *ht;
+ long total, i, j, size;
+ struct ht_node *nd;
+ sorter *s;
+ sorter tmp;
+
+ ht = generate_frequencies (fl, buffer, buflen);
+ total = 0;
+ size = 0;
+ for (nd = ht_first (ht); nd != NULL; nd = ht_next (ht))
+ {
+ total = total + nd->val;
+ size++;
+ }
+ s = calloc (size, sizeof (sorter));
+ i = 0;
+ for (nd = ht_first (ht); nd != NULL; nd = ht_next (ht))
+ {
+ s[i].string = nd->key;
+ s[i++].num = nd->val;
+ }
+ for (i = 0; i < size - 1; i++)
+ for (j = i + 1; j < size; j++)
+ if (s[i].num < s[j].num)
+ {
+ memcpy (&tmp, &(s[i]), sizeof (sorter));
+ memcpy (&(s[i]), &(s[j]), sizeof (sorter));
+ memcpy (&(s[j]), &tmp, sizeof (sorter));
+ }
+ for (i = 0; i < size; i++)
+ printf ("%s %.3f\n", s[i].string, 100 * (float) s[i].num / total);
+ printf ("\n");
+ ht_destroy (ht);
+ free (s);
+}
+
+void
+write_count (char *searchFor, char *buffer, long buflen)
+{
+ struct ht_ht *ht;
+
+ ht = generate_frequencies (strlen (searchFor), buffer, buflen);
+ printf ("%d\t%s\n", ht_find_new (ht, searchFor)->val, searchFor);
+ ht_destroy (ht);
+}
+
+int
+main ()
+{
+ char c;
+ char *line, *buffer, *tmp, *x;
+ int i, linelen, nothree;
+ long buflen, seqlen;
+ FILE * f;
+
+ line = malloc (256);
+ if (!line)
+ return 2;
+ seqlen = 0;
+ nothree = 1;
+
+ f = fopen("Results/knucleotide-input.txt", "r");
+ if (f == NULL) return 2;
+
+ while (nothree && fgets (line, 255, f))
+ if (line[0] == '>' && line[1] == 'T' && line[2] == 'H')
+ nothree = 0;
+ free (line);
+
+ buflen = 10240;
+ buffer = malloc (buflen + 1);
+ if (!buffer)
+ return 2;
+ x = buffer;
+
+ while (fgets (x, 255, f))
+ {
+ linelen = strlen (x);
+ if (linelen)
+ {
+ if (x[linelen - 1] == '\n')
+ linelen--;
+ c = x[0];
+ if (c == '>')
+ break;
+ else if (c != ';')
+ {
+ seqlen = seqlen + linelen;
+ if (seqlen + 512 >= buflen)
+ {
+ buflen = buflen + 10240;
+ tmp = realloc (buffer, buflen + 1);
+ if (tmp == NULL)
+ return 2;
+ buffer = tmp;
+ x = &(buffer[seqlen]);
+ }
+ else
+ x = &(x[linelen]);
+ x[0] = 0;
+ }
+ }
+ }
+ for (i = 0; i < seqlen; i++)
+ buffer[i] = toupper (buffer[i]);
+ write_frequencies (1, buffer, seqlen);
+ write_frequencies (2, buffer, seqlen);
+ write_count ("GGT", buffer, seqlen);
+ write_count ("GGTA", buffer, seqlen);
+ write_count ("GGTATT", buffer, seqlen);
+ write_count ("GGTATTTTAATT", buffer, seqlen);
+ write_count ("GGTATTTTAATTTATAGT", buffer, seqlen);
+ free (buffer);
+ fclose (f);
+ return 0;
+}
+
+/**********
+ build & benchmark results
+
+BUILD COMMANDS FOR: knucleotide.gcc
+
+Fri Sep 15 13:56:07 PDT 2006
+
+/usr/bin/gcc -pipe -Wall -O3 -fomit-frame-pointer -funroll-loops -march=pentium4 knucleotide.c -o knucleotide.gcc_run
+
+=================================================================
+COMMAND LINE (%A is single numeric argument):
+
+knucleotide.gcc_run %A
+N=2500
+
+*******/