aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/core/lib/transport/static_metadata.c62
-rwxr-xr-xtools/codegen/core/gen_static_metadata.py76
-rw-r--r--tools/codegen/core/perfect/.gitignore7
-rwxr-xr-xtools/codegen/core/perfect/build.sh4
-rw-r--r--tools/codegen/core/perfect/lookupa.c240
-rw-r--r--tools/codegen/core/perfect/lookupa.h24
-rw-r--r--tools/codegen/core/perfect/perfect.c1367
-rw-r--r--tools/codegen/core/perfect/perfect.h132
-rw-r--r--tools/codegen/core/perfect/perfhex.c1308
-rw-r--r--tools/codegen/core/perfect/recycle.c87
-rw-r--r--tools/codegen/core/perfect/recycle.h65
-rwxr-xr-xtools/codegen/core/perfect/run.sh6
-rw-r--r--tools/codegen/core/perfect/standard.h57
13 files changed, 57 insertions, 3378 deletions
diff --git a/src/core/lib/transport/static_metadata.c b/src/core/lib/transport/static_metadata.c
index abd5a91eff..ee1e22b2b2 100644
--- a/src/core/lib/transport/static_metadata.c
+++ b/src/core/lib/transport/static_metadata.c
@@ -429,47 +429,41 @@ uintptr_t grpc_static_mdelem_user_data[GRPC_STATIC_MDELEM_COUNT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 4, 6, 6, 8, 8};
-#define ELEMS_PHASHLEN 0x40
-#define ELEMS_PHASHNKEYS 81
-#define ELEMS_PHASHRANGE 128
-#define ELEMS_PHASHSALT 0x9e3779b9
-
-static const uint8_t elems_tab[] = {
- 20, 1, 0, 61, 61, 34, 10, 16, 0, 0, 0, 0, 34, 61, 0, 1,
- 0, 0, 0, 61, 0, 88, 0, 4, 0, 47, 0, 47, 12, 7, 0, 16,
- 51, 87, 76, 4, 79, 10, 70, 47, 76, 61, 71, 88, 0, 88, 0, 47,
- 0, 16, 0, 83, 0, 57, 0, 75, 0, 42, 0, 90, 0, 42, 70, 0,
-};
-
-static uint32_t elems_phash(uint32_t val) {
- val += (uint32_t)-11;
-
- uint32_t a, b, rsl;
-
- b = (val & 0x3f);
- a = ((val << 18) >> 26);
- rsl = (a ^ elems_tab[b]);
- return rsl;
+static const int8_t elems_r[] = {
+ 10, 8, -3, 0, 9, 21, -76, 22, 0, 10, -7, 20, 0, 19, 18, 17,
+ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, -49, -50, 16, -52, -53, -54, -54, -55, -56, -57, 0, 38, 37, 36, 35,
+ 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19,
+ 18, 17, 16, 15, 14, 13, 12, 15, 14, 13, 12, 11, 10, 9, 8, 0};
+static uint32_t elems_phash(uint32_t i) {
+ i -= 42;
+ uint32_t x = i % 96;
+ uint32_t y = i / 96;
+ return x + (uint32_t)elems_r[y];
}
static const uint16_t elem_keys[] = {
- 138, 522, 714, 5116, 1098, 430, 5802, 232, 8840, 913, 240, 8644,
- 231, 8742, 7762, 1392, 42, 5410, 4822, 5998, 139, 1490, 5900, 7664,
- 6292, 8448, 6684, 7272, 7370, 8350, 8154, 7958, 7566, 912, 9036, 7860,
- 6488, 8546, 1111, 9134, 712, 5214, 132, 1074, 1010, 5312, 314, 242,
- 8252, 4951, 8938, 43, 7076, 6096, 6586, 6194, 1294, 1076, 5606, 1588,
- 5704, 244, 911, 5508, 6390, 7174, 6880, 1077, 713, 1009, 241, 8056,
- 1075, 6782, 7468, 4920, 243, 429, 431, 1011, 6978, 0, 0, 0,
+ 1009, 1010, 1011, 240, 241, 242, 243, 244, 138, 139, 42, 43,
+ 429, 430, 431, 911, 912, 913, 712, 713, 1098, 522, 714, 1294,
+ 1392, 1490, 1588, 4822, 4920, 4951, 5116, 5214, 5312, 1111, 5410, 5508,
+ 5606, 5704, 5802, 5900, 5998, 6096, 6194, 6292, 6390, 6488, 6586, 6684,
+ 6782, 6880, 6978, 7076, 7174, 7272, 7370, 7468, 7566, 7664, 7762, 7860,
+ 7958, 8056, 8154, 8252, 8350, 1074, 1075, 1076, 1077, 8448, 8546, 8644,
+ 8742, 8840, 8938, 9036, 9134, 314, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 132, 231, 232, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0};
+ 0};
static const uint8_t elem_idxs[] = {
- 15, 6, 2, 27, 41, 12, 34, 10, 69, 5, 19, 67, 9, 68, 58, 48, 17,
- 30, 24, 36, 16, 55, 35, 57, 39, 65, 44, 51, 52, 64, 62, 60, 54, 4,
- 72, 59, 42, 66, 7, 73, 0, 28, 8, 76, 77, 29, 14, 21, 63, 26, 71,
- 18, 49, 37, 43, 38, 70, 79, 32, 56, 33, 23, 3, 31, 40, 50, 46, 80,
- 1, 74, 20, 61, 78, 45, 53, 25, 22, 11, 13, 75, 47};
+ 74, 77, 75, 19, 20, 21, 22, 23, 15, 16, 17, 18, 11, 12, 13,
+ 3, 4, 5, 0, 1, 41, 6, 2, 70, 48, 55, 56, 24, 25, 26,
+ 27, 28, 29, 7, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 57, 58, 59,
+ 60, 61, 62, 63, 64, 76, 78, 79, 80, 65, 66, 67, 68, 69, 71,
+ 72, 73, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 8, 9, 10};
grpc_mdelem grpc_static_mdelem_for_static_strings(int a, int b) {
if (a == -1 || b == -1) return GRPC_MDNULL;
diff --git a/tools/codegen/core/gen_static_metadata.py b/tools/codegen/core/gen_static_metadata.py
index 18f71646f8..b7c9533602 100755
--- a/tools/codegen/core/gen_static_metadata.py
+++ b/tools/codegen/core/gen_static_metadata.py
@@ -36,6 +36,7 @@ import os
import sys
import subprocess
import re
+import perfection
# configuration: a list of either strings or 2-tuples of strings
# a single string represents a static grpc_mdstr
@@ -407,59 +408,40 @@ def offset_trials(mink):
yield mul * i
def perfect_hash(keys, name):
- ok = False
- print '***********'
- print keys
- cmd = '%s/perfect/build.sh' % (os.path.dirname(sys.argv[0]))
- subprocess.check_call(cmd, shell=True)
- for offset in offset_trials(min(keys)):
- tmp = open('/tmp/keys.txt', 'w')
- offset_keys = [x + offset for x in keys]
- print offset_keys
- tmp.write(''.join('%d\n' % x for x in offset_keys))
- tmp.close()
- cmd = '%s/perfect/run.sh %s -dms' % (os.path.dirname(sys.argv[0]), tmp.name)
- out = subprocess.check_output(cmd, shell=True)
- if 'fatal error' not in out:
- ok = True
- break
- assert ok, "Failed to find hash of keys in /tmp/keys.txt"
-
- code = ''
-
- results = {}
- with open('%s/perfect/phash.h' % os.path.dirname(sys.argv[0])) as f:
- txt = f.read()
- for var in ('PHASHLEN', 'PHASHNKEYS', 'PHASHRANGE', 'PHASHSALT'):
- val = re.search(r'#define %s ([0-9a-zA-Z]+)' % var, txt).group(1)
- code += '#define %s_%s %s\n' % (name.upper(), var, val)
- results[var] = val
- code += '\n'
- pycode = 'def f(val):\n'
- pycode += ' val += %d\n' % offset
- with open('%s/perfect/phash.c' % os.path.dirname(sys.argv[0])) as f:
- txt = f.read()
- tabdata = re.search(r'ub1 tab\[\] = \{([^}]+)\}', txt, re.MULTILINE).group(1)
- code += 'static const uint8_t %s_tab[] = {%s};\n\n' % (name, tabdata)
- func_body = re.search(r'ub4 phash\(val\)\nub4 val;\n\{([^}]+)\}', txt, re.MULTILINE).group(1).replace('ub4', 'uint32_t')
- code += 'static uint32_t %s_phash(uint32_t val) {\nval += (uint32_t)%d;\n%s}\n' % (name,
- offset, func_body.replace('tab', '%s_tab' % name))
- pycode += ' tab=(%s)' % tabdata.replace('\n', '')
- pycode += '\n'.join(' %s' % s.strip() for s in func_body.splitlines()[2:])
- g = {}
- exec pycode in g
- pyfunc = g['f']
-
- results['code'] = code
- results['pyfunc'] = pyfunc
- return results
+ p = perfection.hash_parameters(keys)
+ def f(i, p=p):
+ i += p.offset
+ x = i % p.t
+ y = i / p.t
+ return x + p.r[y]
+ return {
+ 'PHASHRANGE': p.t - 1 + max(p.r),
+ 'PHASHNKEYS': len(p.slots),
+ 'pyfunc': f,
+ 'code': """
+static const int8_t %(name)s_r[] = {%(r)s};
+static uint32_t %(name)s_phash(uint32_t i) {
+ i %(offset_sign)s= %(offset)d;
+ uint32_t x = i %% %(t)d;
+ uint32_t y = i / %(t)d;
+ return x + (uint32_t)%(name)s_r[y];
+}
+ """ % {
+ 'name': name,
+ 'r': ','.join('%d' % (r if r is not None else 0) for r in p.r),
+ 't': p.t,
+ 'offset': abs(p.offset),
+ 'offset_sign': '+' if p.offset > 0 else '-'
+ }
+ }
+
elem_keys = [str_idx(elem[0]) * len(all_strs) + str_idx(elem[1]) for elem in all_elems]
elem_hash = perfect_hash(elem_keys, "elems")
print >>C, elem_hash['code']
keys = [0] * int(elem_hash['PHASHRANGE'])
-idxs = [-1] * int(elem_hash['PHASHNKEYS'])
+idxs = [255] * int(elem_hash['PHASHNKEYS'])
for i, k in enumerate(elem_keys):
h = elem_hash['pyfunc'](k)
assert keys[h] == 0
diff --git a/tools/codegen/core/perfect/.gitignore b/tools/codegen/core/perfect/.gitignore
deleted file mode 100644
index c1489f0819..0000000000
--- a/tools/codegen/core/perfect/.gitignore
+++ /dev/null
@@ -1,7 +0,0 @@
-perfect
-*.o
-phash.h
-phash.c
-compile.txt
-hash.txt
-
diff --git a/tools/codegen/core/perfect/build.sh b/tools/codegen/core/perfect/build.sh
deleted file mode 100755
index 139556ea48..0000000000
--- a/tools/codegen/core/perfect/build.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-set -e
-cd $(dirname $0)
-gcc -o perfect perfect.c recycle.c lookupa.c perfhex.c 2> compile.txt
diff --git a/tools/codegen/core/perfect/lookupa.c b/tools/codegen/core/perfect/lookupa.c
deleted file mode 100644
index c122c4f107..0000000000
--- a/tools/codegen/core/perfect/lookupa.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
---------------------------------------------------------------------
-lookupa.c, by Bob Jenkins, December 1996. Same as lookup2.c
-Use this code however you wish. Public Domain. No warranty.
-Source is http://burtleburtle.net/bob/c/lookupa.c
---------------------------------------------------------------------
-*/
-#ifndef STANDARD
-#include "standard.h"
-#endif
-#ifndef LOOKUPA
-#include "lookupa.h"
-#endif
-
-/*
---------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-For every delta with one or two bit set, and the deltas of all three
- high bits or all three low bits, whether the original value of a,b,c
- is almost all zero or is uniformly distributed,
-* If mix() is run forward or backward, at least 32 bits in a,b,c
- have at least 1/4 probability of changing.
-* If mix() is run forward, every bit of c will change between 1/3 and
- 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
-mix() was built out of 36 single-cycle latency instructions in a
- structure that could supported 2x parallelism, like so:
- a -= b;
- a -= c; x = (c>>13);
- b -= c; a ^= x;
- b -= a; x = (a<<8);
- c -= a; b ^= x;
- c -= b; x = (b>>13);
- ...
- Unfortunately, superscalar Pentiums and Sparcs can't take advantage
- of that parallelism. They've also turned some of those single-cycle
- latency instructions into multi-cycle latency instructions. Still,
- this is the fastest good hash I could find. There were about 2^^68
- to choose from. I only looked at a billion or so.
---------------------------------------------------------------------
-*/
-#define mix(a,b,c) \
-{ \
- a -= b; a -= c; a ^= (c>>13); \
- b -= c; b -= a; b ^= (a<<8); \
- c -= a; c -= b; c ^= (b>>13); \
- a -= b; a -= c; a ^= (c>>12); \
- b -= c; b -= a; b ^= (a<<16); \
- c -= a; c -= b; c ^= (b>>5); \
- a -= b; a -= c; a ^= (c>>3); \
- b -= c; b -= a; b ^= (a<<10); \
- c -= a; c -= b; c ^= (b>>15); \
-}
-
-/*
---------------------------------------------------------------------
-lookup() -- hash a variable-length key into a 32-bit value
- k : the key (the unaligned variable-length array of bytes)
- len : the length of the key, counting by bytes
- level : can be any 4-byte value
-Returns a 32-bit value. Every bit of the key affects every bit of
-the return value. Every 1-bit and 2-bit delta achieves avalanche.
-About 6len+35 instructions.
-
-The best hash table sizes are powers of 2. There is no need to do
-mod a prime (mod is sooo slow!). If you need less than 32 bits,
-use a bitmask. For example, if you need only 10 bits, do
- h = (h & hashmask(10));
-In which case, the hash table should have hashsize(10) elements.
-
-If you are hashing n strings (ub1 **)k, do it like this:
- for (i=0, h=0; i<n; ++i) h = lookup( k[i], len[i], h);
-
-By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
-code any way you wish, private, educational, or commercial.
-
-See http://burtleburtle.net/bob/hash/evahash.html
-Use for hash table lookup, or anything where one collision in 2^32 is
-acceptable. Do NOT use for cryptographic purposes.
---------------------------------------------------------------------
-*/
-
-ub4 lookup( k, length, level)
-register ub1 *k; /* the key */
-register ub4 length; /* the length of the key */
-register ub4 level; /* the previous hash, or an arbitrary value */
-{
- register ub4 a,b,c,len;
-
- /* Set up the internal state */
- len = length;
- a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
- c = level; /* the previous hash value */
-
- /*---------------------------------------- handle most of the key */
- while (len >= 12)
- {
- a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
- b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
- c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
- mix(a,b,c);
- k += 12; len -= 12;
- }
-
- /*------------------------------------- handle the last 11 bytes */
- c += length;
- switch(len) /* all the case statements fall through */
- {
- case 11: c+=((ub4)k[10]<<24);
- case 10: c+=((ub4)k[9]<<16);
- case 9 : c+=((ub4)k[8]<<8);
- /* the first byte of c is reserved for the length */
- case 8 : b+=((ub4)k[7]<<24);
- case 7 : b+=((ub4)k[6]<<16);
- case 6 : b+=((ub4)k[5]<<8);
- case 5 : b+=k[4];
- case 4 : a+=((ub4)k[3]<<24);
- case 3 : a+=((ub4)k[2]<<16);
- case 2 : a+=((ub4)k[1]<<8);
- case 1 : a+=k[0];
- /* case 0: nothing left to add */
- }
- mix(a,b,c);
- /*-------------------------------------------- report the result */
- return c;
-}
-
-
-/*
---------------------------------------------------------------------
-mixc -- mixc 8 4-bit values as quickly and thoroughly as possible.
-Repeating mix() three times achieves avalanche.
-Repeating mix() four times eliminates all funnels and all
- characteristics stronger than 2^{-11}.
---------------------------------------------------------------------
-*/
-#define mixc(a,b,c,d,e,f,g,h) \
-{ \
- a^=b<<11; d+=a; b+=c; \
- b^=c>>2; e+=b; c+=d; \
- c^=d<<8; f+=c; d+=e; \
- d^=e>>16; g+=d; e+=f; \
- e^=f<<10; h+=e; f+=g; \
- f^=g>>4; a+=f; g+=h; \
- g^=h<<8; b+=g; h+=a; \
- h^=a>>9; c+=h; a+=b; \
-}
-
-/*
---------------------------------------------------------------------
-checksum() -- hash a variable-length key into a 256-bit value
- k : the key (the unaligned variable-length array of bytes)
- len : the length of the key, counting by bytes
- state : an array of CHECKSTATE 4-byte values (256 bits)
-The state is the checksum. Every bit of the key affects every bit of
-the state. There are no funnels. About 112+6.875len instructions.
-
-If you are hashing n strings (ub1 **)k, do it like this:
- for (i=0; i<8; ++i) state[i] = 0x9e3779b9;
- for (i=0, h=0; i<n; ++i) checksum( k[i], len[i], state);
-
-See http://burtleburtle.net/bob/hash/evahash.html
-Use to detect changes between revisions of documents, assuming nobody
-is trying to cause collisions. Do NOT use for cryptography.
---------------------------------------------------------------------
-*/
-void checksum( k, len, state)
-register ub1 *k;
-register ub4 len;
-register ub4 *state;
-{
- register ub4 a,b,c,d,e,f,g,h,length;
-
- /* Use the length and level; add in the golden ratio. */
- length = len;
- a=state[0]; b=state[1]; c=state[2]; d=state[3];
- e=state[4]; f=state[5]; g=state[6]; h=state[7];
-
- /*---------------------------------------- handle most of the key */
- while (len >= 32)
- {
- a += (k[0] +(k[1]<<8) +(k[2]<<16) +(k[3]<<24));
- b += (k[4] +(k[5]<<8) +(k[6]<<16) +(k[7]<<24));
- c += (k[8] +(k[9]<<8) +(k[10]<<16)+(k[11]<<24));
- d += (k[12]+(k[13]<<8)+(k[14]<<16)+(k[15]<<24));
- e += (k[16]+(k[17]<<8)+(k[18]<<16)+(k[19]<<24));
- f += (k[20]+(k[21]<<8)+(k[22]<<16)+(k[23]<<24));
- g += (k[24]+(k[25]<<8)+(k[26]<<16)+(k[27]<<24));
- h += (k[28]+(k[29]<<8)+(k[30]<<16)+(k[31]<<24));
- mixc(a,b,c,d,e,f,g,h);
- mixc(a,b,c,d,e,f,g,h);
- mixc(a,b,c,d,e,f,g,h);
- mixc(a,b,c,d,e,f,g,h);
- k += 32; len -= 32;
- }
-
- /*------------------------------------- handle the last 31 bytes */
- h += length;
- switch(len)
- {
- case 31: h+=(k[30]<<24);
- case 30: h+=(k[29]<<16);
- case 29: h+=(k[28]<<8);
- case 28: g+=(k[27]<<24);
- case 27: g+=(k[26]<<16);
- case 26: g+=(k[25]<<8);
- case 25: g+=k[24];
- case 24: f+=(k[23]<<24);
- case 23: f+=(k[22]<<16);
- case 22: f+=(k[21]<<8);
- case 21: f+=k[20];
- case 20: e+=(k[19]<<24);
- case 19: e+=(k[18]<<16);
- case 18: e+=(k[17]<<8);
- case 17: e+=k[16];
- case 16: d+=(k[15]<<24);
- case 15: d+=(k[14]<<16);
- case 14: d+=(k[13]<<8);
- case 13: d+=k[12];
- case 12: c+=(k[11]<<24);
- case 11: c+=(k[10]<<16);
- case 10: c+=(k[9]<<8);
- case 9 : c+=k[8];
- case 8 : b+=(k[7]<<24);
- case 7 : b+=(k[6]<<16);
- case 6 : b+=(k[5]<<8);
- case 5 : b+=k[4];
- case 4 : a+=(k[3]<<24);
- case 3 : a+=(k[2]<<16);
- case 2 : a+=(k[1]<<8);
- case 1 : a+=k[0];
- }
- mixc(a,b,c,d,e,f,g,h);
- mixc(a,b,c,d,e,f,g,h);
- mixc(a,b,c,d,e,f,g,h);
- mixc(a,b,c,d,e,f,g,h);
-
- /*-------------------------------------------- report the result */
- state[0]=a; state[1]=b; state[2]=c; state[3]=d;
- state[4]=e; state[5]=f; state[6]=g; state[7]=h;
-}
diff --git a/tools/codegen/core/perfect/lookupa.h b/tools/codegen/core/perfect/lookupa.h
deleted file mode 100644
index 0b27db680d..0000000000
--- a/tools/codegen/core/perfect/lookupa.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
-------------------------------------------------------------------------------
-By Bob Jenkins, September 1996.
-lookupa.h, a hash function for table lookup, same function as lookup.c.
-Use this code in any way you wish. Public Domain. It has no warranty.
-Source is http://burtleburtle.net/bob/c/lookupa.h
-------------------------------------------------------------------------------
-*/
-
-#ifndef STANDARD
-#include "standard.h"
-#endif
-
-#ifndef LOOKUPA
-#define LOOKUPA
-
-#define CHECKSTATE 8
-#define hashsize(n) ((ub4)1<<(n))
-#define hashmask(n) (hashsize(n)-1)
-
-ub4 lookup(/*_ ub1 *k, ub4 length, ub4 level _*/);
-void checksum(/*_ ub1 *k, ub4 length, ub4 *state _*/);
-
-#endif /* LOOKUPA */
diff --git a/tools/codegen/core/perfect/perfect.c b/tools/codegen/core/perfect/perfect.c
deleted file mode 100644
index 67fd2fd262..0000000000
--- a/tools/codegen/core/perfect/perfect.c
+++ /dev/null
@@ -1,1367 +0,0 @@
-/*
-------------------------------------------------------------------------------
-perfect.c: code to generate code for a hash for perfect hashing.
-(c) Bob Jenkins, September 1996, December 1999
-You may use this code in any way you wish, and it is free. No warranty.
-I hereby place this in the public domain.
-Source is http://burtleburtle.net/bob/c/perfect.c
-
-This generates a minimal perfect hash function. That means, given a
-set of n keys, this determines a hash function that maps each of
-those keys into a value in 0..n-1 with no collisions.
-
-The perfect hash function first uses a normal hash function on the key
-to determine (a,b) such that the pair (a,b) is distinct for all
-keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
-tab[] is an array of 1-byte values and scramble[] is a 256-term array of
-2-byte or 4-byte values. If there are n keys, the length of tab[] is a
-power of two between n/3 and n.
-
-I found the idea of computing distinct (a,b) values in "Practical minimal
-perfect hash functions for large databases", Fox, Heath, Chen, and Daoud,
-Communications of the ACM, January 1992. They found the idea in Chichelli
-(CACM Jan 1980). Beyond that, our methods differ.
-
-The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
-0..*blen*-1. A fast hash function determines both a and b
-simultaneously. Any decent hash function is likely to produce
-hashes so that (a,b) is distinct for all pairs. I try the hash
-using different values of *salt* until all pairs are distinct.
-
-The final hash is (a XOR scramble[tab[b]]). *scramble* is a
-predetermined mapping of 0..255 into 0..smax-1. *tab* is an
-array that we fill in in such a way as to make the hash perfect.
-
-First we fill in all values of *tab* that are used by more than one
-key. We try all possible values for each position until one works.
-
-This leaves m unmapped keys and m values that something could hash to.
-If you treat unmapped keys as lefthand nodes and unused hash values
-as righthand nodes, and draw a line connecting each key to each hash
-value it could map to, you get a bipartite graph. We attempt to
-find a perfect matching in this graph. If we succeed, we have
-determined a perfect hash for the whole set of keys.
-
-*scramble* is used because (a^tab[i]) clusters keys around *a*.
-------------------------------------------------------------------------------
-*/
-
-#ifndef STANDARD
-#include "standard.h"
-#endif
-#ifndef LOOKUPA
-#include "lookupa.h"
-#endif
-#ifndef RECYCLE
-#include "recycle.h"
-#endif
-#ifndef PERFECT
-#include "perfect.h"
-#endif
-
-/*
-------------------------------------------------------------------------------
-Find the mapping that will produce a perfect hash
-------------------------------------------------------------------------------
-*/
-
-/* return the ceiling of the log (base 2) of val */
-ub4 mylog2(val)
-ub4 val;
-{
- ub4 i;
- for (i=0; ((ub4)1<<i) < val; ++i)
- ;
- return i;
-}
-
-/* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
-/* permute(0)=0. This is intended and useful. */
-static ub4 permute(x, nbits)
-ub4 x; /* input, a value in some range */
-ub4 nbits; /* input, number of bits in range */
-{
- int i;
- int mask = ((ub4)1<<nbits)-1; /* all ones */
- int const2 = 1+nbits/2;
- int const3 = 1+nbits/3;
- int const4 = 1+nbits/4;
- int const5 = 1+nbits/5;
- for (i=0; i<20; ++i)
- {
- x = (x+(x<<const2)) & mask;
- x = (x^(x>>const3));
- x = (x+(x<<const4)) & mask;
- x = (x^(x>>const5));
- }
- return x;
-}
-
-/* initialize scramble[] with distinct random values in 0..smax-1 */
-static void scrambleinit(scramble, smax)
-ub4 *scramble; /* hash is a^scramble[tab[b]] */
-ub4 smax; /* scramble values should be in 0..smax-1 */
-{
- ub4 i;
-
- /* fill scramble[] with distinct random integers in 0..smax-1 */
- for (i=0; i<SCRAMBLE_LEN; ++i)
- {
- scramble[i] = permute(i, mylog2(smax));
- }
-}
-
-/*
- * Check if key1 and key2 are the same.
- * We already checked (a,b) are the same.
- */
-static void checkdup(key1, key2, form)
-key *key1;
-key *key2;
-hashform *form;
-{
- switch(form->hashtype)
- {
- case STRING_HT:
- if ((key1->len_k == key2->len_k) &&
- !memcmp(key1->name_k, key2->name_k, (size_t)key1->len_k))
- {
- fprintf(stderr, "perfect.c: Duplicates keys! %.*s\n",
- key1->len_k, key1->name_k);
- exit(SUCCESS);
- }
- break;
- case INT_HT:
- if (key1->hash_k == key2->hash_k)
- {
- fprintf(stderr, "perfect.c: Duplicate keys! %.8lx\n", key1->hash_k);
- exit(SUCCESS);
- }
- break;
- case AB_HT:
- fprintf(stderr, "perfect.c: Duplicate keys! %.8lx %.8lx\n",
- key1->a_k, key1->b_k);
- exit(SUCCESS);
- break;
- default:
- fprintf(stderr, "perfect.c: Illegal hash type %ld\n", (ub4)form->hashtype);
- exit(SUCCESS);
- break;
- }
-}
-
-
-/*
- * put keys in tabb according to key->b_k
- * check if the initial hash might work
- */
-static int inittab(tabb, blen, keys, form, complete)
-bstuff *tabb; /* output, list of keys with b for (a,b) */
-ub4 blen; /* length of tabb */
-key *keys; /* list of keys already hashed */
-hashform *form; /* user directives */
-int complete; /* TRUE means to complete init despite collisions */
-{
- int nocollision = TRUE;
- key *mykey;
-
- memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
-
- /* Two keys with the same (a,b) guarantees a collision */
- for (mykey=keys; mykey; mykey=mykey->next_k)
- {
- key *otherkey;
-
- for (otherkey=tabb[mykey->b_k].list_b;
- otherkey;
- otherkey=otherkey->nextb_k)
- {
- if (mykey->a_k == otherkey->a_k)
- {
- nocollision = FALSE;
- checkdup(mykey, otherkey, form);
- if (!complete)
- return FALSE;
- }
- }
- ++tabb[mykey->b_k].listlen_b;
- mykey->nextb_k = tabb[mykey->b_k].list_b;
- tabb[mykey->b_k].list_b = mykey;
- }
-
- /* no two keys have the same (a,b) pair */
- return nocollision;
-}
-
-
-/* Do the initial hash for normal mode (use lookup and checksum) */
-static void initnorm(keys, alen, blen, smax, salt, final)
-key *keys; /* list of all keys */
-ub4 alen; /* (a,b) has a in 0..alen-1, a power of 2 */
-ub4 blen; /* (a,b) has b in 0..blen-1, a power of 2 */
-ub4 smax; /* maximum range of computable hash values */
-ub4 salt; /* used to initialize the hash function */
-gencode *final; /* output, code for the final hash */
-{
- key *mykey;
- if (mylog2(alen)+mylog2(blen) > UB4BITS)
- {
- ub4 initlev = salt*0x9e3779b9; /* the golden ratio; an arbitrary value */
-
- for (mykey=keys; mykey; mykey=mykey->next_k)
- {
- ub4 i, state[CHECKSTATE];
- for (i=0; i<CHECKSTATE; ++i) state[i] = initlev;
- checksum( mykey->name_k, mykey->len_k, state);
- mykey->a_k = state[0]&(alen-1);
- mykey->b_k = state[1]&(blen-1);
- }
- final->used = 4;
- sprintf(final->line[0],
- " ub4 i,state[CHECKSTATE],rsl;\n");
- sprintf(final->line[1],
- " for (i=0; i<CHECKSTATE; ++i) state[i]=0x%lx;\n",initlev);
- sprintf(final->line[2],
- " checksum(key, len, state);\n");
- sprintf(final->line[3],
- " rsl = ((state[0]&0x%x)^scramble[tab[state[1]&0x%x]]);\n",
- alen-1, blen-1);
- }
- else
- {
- ub4 loga = mylog2(alen); /* log based 2 of blen */
- ub4 initlev = salt*0x9e3779b9; /* the golden ratio; an arbitrary value */
-
- for (mykey=keys; mykey; mykey=mykey->next_k)
- {
- ub4 hash = lookup(mykey->name_k, mykey->len_k, initlev);
- mykey->a_k = (loga > 0) ? hash>>(UB4BITS-loga) : 0;
- mykey->b_k = (blen > 1) ? hash&(blen-1) : 0;
- }
- final->used = 2;
- sprintf(final->line[0],
- " ub4 rsl, val = lookup(key, len, 0x%lx);\n", initlev);
- if (smax <= 1)
- {
- sprintf(final->line[1], " rsl = 0;\n");
- }
- else if (mylog2(alen) == 0)
- {
- sprintf(final->line[1], " rsl = tab[val&0x%x];\n", blen-1);
- }
- else if (blen < USE_SCRAMBLE)
- {
- sprintf(final->line[1], " rsl = ((val>>%ld)^tab[val&0x%x]);\n",
- UB4BITS-mylog2(alen), blen-1);
- }
- else
- {
- sprintf(final->line[1], " rsl = ((val>>%ld)^scramble[tab[val&0x%x]]);\n",
- UB4BITS-mylog2(alen), blen-1);
- }
- }
-}
-
-
-
-/* Do initial hash for inline mode */
-static void initinl(keys, alen, blen, smax, salt, final)
-key *keys; /* list of all keys */
-ub4 alen; /* (a,b) has a in 0..alen-1, a power of 2 */
-ub4 blen; /* (a,b) has b in 0..blen-1, a power of 2 */
-ub4 smax; /* range of computable hash values */
-ub4 salt; /* used to initialize the hash function */
-gencode *final; /* generated code for final hash */
-{
- key *mykey;
- ub4 amask = alen-1;
- ub4 blog = mylog2(blen);
- ub4 initval = salt*0x9e3779b9; /* the golden ratio; an arbitrary value */
-
- /* It's more important to have b uniform than a, so b is the low bits */
- for (mykey = keys; mykey != (key *)0; mykey = mykey->next_k)
- {
- ub4 hash = initval;
- ub4 i;
- for (i=0; i<mykey->len_k; ++i)
- {
- hash = (mykey->name_k[i] ^ hash) + ((hash<<(UB4BITS-6))+(hash>>6));
- }
- mykey->hash_k = hash;
- mykey->a_k = (alen > 1) ? (hash & amask) : 0;
- mykey->b_k = (blen > 1) ? (hash >> (UB4BITS-blog)) : 0;
- }
- final->used = 1;
- if (smax <= 1)
- {
- sprintf(final->line[0], " ub4 rsl = 0;\n");
- }
- else if (blen < USE_SCRAMBLE)
- {
- sprintf(final->line[0], " ub4 rsl = ((val & 0x%lx) ^ tab[val >> %ld]);\n",
- amask, UB4BITS-blog);
- }
- else
- {
- sprintf(final->line[0], " ub4 rsl = ((val & 0x%lx) ^ scramble[tab[val >> %ld]]);\n",
- amask, UB4BITS-blog);
- }
-}
-
-
-/*
- * Run a hash function on the key to get a and b
- * Returns:
- * 0: didn't find distinct (a,b) for all keys
- * 1: found distinct (a,b) for all keys, put keys in tabb[]
- * 2: found a perfect hash, no need to do any more work
- */
-static ub4 initkey(keys, nkeys, tabb, alen, blen, smax, salt, form, final)
-key *keys; /* list of all keys */
-ub4 nkeys; /* total number of keys */
-bstuff *tabb; /* stuff indexed by b */
-ub4 alen; /* (a,b) has a in 0..alen-1, a power of 2 */
-ub4 blen; /* (a,b) has b in 0..blen-1, a power of 2 */
-ub4 smax; /* range of computable hash values */
-ub4 salt; /* used to initialize the hash function */
-hashform *form; /* user directives */
-gencode *final; /* code for final hash */
-{
- ub4 finished;
-
- /* Do the initial hash of the keys */
- switch(form->mode)
- {
- case NORMAL_HM:
- initnorm(keys, alen, blen, smax, salt, final);
- break;
- case INLINE_HM:
- initinl(keys, alen, blen, smax, salt, final);
- break;
- case HEX_HM:
- case DECIMAL_HM:
- finished = inithex(keys, nkeys, alen, blen, smax, salt, final, form);
- if (finished) return 2;
- break;
- default:
- fprintf(stderr, "fatal error: illegal mode\n");
- exit(1);
- }
-
- if (nkeys <= 1)
- {
- final->used = 1;
- sprintf(final->line[0], " ub4 rsl = 0;\n");
- return 2;
- }
-
- return inittab(tabb, blen, keys, form, FALSE);
-}
-
-/* Print an error message and exit if there are duplicates */
-static void duplicates(tabb, blen, keys, form)
-bstuff *tabb; /* array of lists of keys with the same b */
-ub4 blen; /* length of tabb, a power of 2 */
-key *keys;
-hashform *form; /* user directives */
-{
- ub4 i;
- key *key1;
- key *key2;
-
- (void)inittab(tabb, blen, keys, form, TRUE);
-
- /* for each b, do nested loops through key list looking for duplicates */
- for (i=0; i<blen; ++i)
- for (key1=tabb[i].list_b; key1; key1=key1->nextb_k)
- for (key2=key1->nextb_k; key2; key2=key2->nextb_k)
- checkdup(key1, key2, form);
-}
-
-
-/* Try to apply an augmenting list */
-static int apply(tabb, tabh, tabq, blen, scramble, tail, rollback)
-bstuff *tabb;
-hstuff *tabh;
-qstuff *tabq;
-ub4 blen;
-ub4 *scramble;
-ub4 tail;
-int rollback; /* FALSE applies augmenting path, TRUE rolls back */
-{
- ub4 hash;
- key *mykey;
- bstuff *pb;
- ub4 child;
- ub4 parent;
- ub4 stabb; /* scramble[tab[b]] */
-
- /* walk from child to parent */
- for (child=tail-1; child; child=parent)
- {
- parent = tabq[child].parent_q; /* find child's parent */
- pb = tabq[parent].b_q; /* find parent's list of siblings */
-
- /* erase old hash values */
- stabb = scramble[pb->val_b];
- for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
- {
- hash = mykey->a_k^stabb;
- if (mykey == tabh[hash].key_h)
- { /* erase hash for all of child's siblings */
- tabh[hash].key_h = (key *)0;
- }
- }
-
- /* change pb->val_b, which will change the hashes of all parent siblings */
- pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
-
- /* set new hash values */
- stabb = scramble[pb->val_b];
- for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
- {
- hash = mykey->a_k^stabb;
- if (rollback)
- {
- if (parent == 0) continue; /* root never had a hash */
- }
- else if (tabh[hash].key_h)
- {
- /* very rare: roll back any changes */
- (void *)apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
- return FALSE; /* failure, collision */
- }
- tabh[hash].key_h = mykey;
- }
- }
- return TRUE;
-}
-
-
-/*
--------------------------------------------------------------------------------
-augment(): Add item to the mapping.
-
-Construct a spanning tree of *b*s with *item* as root, where each
-parent can have all its hashes changed (by some new val_b) with
-at most one collision, and each child is the b of that collision.
-
-I got this from Tarjan's "Data Structures and Network Algorithms". The
-path from *item* to a *b* that can be remapped with no collision is
-an "augmenting path". Change values of tab[b] along the path so that
-the unmapped key gets mapped and the unused hash value gets used.
-
-Assuming 1 key per b, if m out of n hash values are still unused,
-you should expect the transitive closure to cover n/m nodes before
-an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect
-this approach to take about nlogn time to map all single-key b's.
--------------------------------------------------------------------------------
-*/
-static int augment(tabb, tabh, tabq, blen, scramble, smax, item, nkeys,
- highwater, form)
-bstuff *tabb; /* stuff indexed by b */
-hstuff *tabh; /* which key is associated with which hash, indexed by hash */
-qstuff *tabq; /* queue of *b* values, this is the spanning tree */
-ub4 blen; /* length of tabb */
-ub4 *scramble; /* final hash is a^scramble[tab[b]] */
-ub4 smax; /* highest value in scramble */
-bstuff *item; /* &tabb[b] for the b to be mapped */
-ub4 nkeys; /* final hash must be in 0..nkeys-1 */
-ub4 highwater; /* a value higher than any now in tabb[].water_b */
-hashform *form; /* TRUE if we should do a minimal perfect hash */
-{
- ub4 q; /* current position walking through the queue */
- ub4 tail; /* tail of the queue. 0 is the head of the queue. */
- ub4 limit=((blen < USE_SCRAMBLE) ? smax : UB1MAXVAL+1);
- ub4 highhash = ((form->perfect == MINIMAL_HP) ? nkeys : smax);
- int trans = (form->speed == SLOW_HS || form->perfect == MINIMAL_HP);
-
- /* initialize the root of the spanning tree */
- tabq[0].b_q = item;
- tail = 1;
-
- /* construct the spanning tree by walking the queue, add children to tail */
- for (q=0; q<tail; ++q)
- {
- bstuff *myb = tabq[q].b_q; /* the b for this node */
- ub4 i; /* possible value for myb->val_b */
-
- if (!trans && (q == 1))
- break; /* don't do transitive closure */
-
- for (i=0; i<limit; ++i)
- {
- bstuff *childb = (bstuff *)0; /* the b that this i maps to */
- key *mykey; /* for walking through myb's keys */
-
- for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
- {
- key *childkey;
- ub4 hash = mykey->a_k^scramble[i];
-
- if (hash >= highhash) break; /* out of bounds */
- childkey = tabh[hash].key_h;
-
- if (childkey)
- {
- bstuff *hitb = &tabb[childkey->b_k];
-
- if (childb)
- {
- if (childb != hitb) break; /* hit at most one child b */
- }
- else
- {
- childb = hitb; /* remember this as childb */
- if (childb->water_b == highwater) break; /* already explored */
- }
- }
- }
- if (mykey) continue; /* myb with i has multiple collisions */
-
- /* add childb to the queue of reachable things */
- if (childb) childb->water_b = highwater;
- tabq[tail].b_q = childb;
- tabq[tail].newval_q = i; /* how to make parent (myb) use this hash */
- tabq[tail].oldval_q = myb->val_b; /* need this for rollback */
- tabq[tail].parent_q = q;
- ++tail;
-
- if (!childb)
- { /* found an *i* with no collisions? */
- /* try to apply the augmenting path */
- if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
- return TRUE; /* success, item was added to the perfect hash */
-
- --tail; /* don't know how to handle such a child! */
- }
- }
- }
- return FALSE;
-}
-
-
-/* find a mapping that makes this a perfect hash */
-static int perfect(tabb, tabh, tabq, blen, smax, scramble, nkeys, form)
-bstuff *tabb;
-hstuff *tabh;
-qstuff *tabq;
-ub4 blen;
-ub4 smax;
-ub4 *scramble;
-ub4 nkeys;
-hashform *form;
-{
- ub4 maxkeys; /* maximum number of keys for any b */
- ub4 i, j;
-
- /* clear any state from previous attempts */
- memset((void *)tabh, 0,
- (size_t)(sizeof(hstuff)*
- ((form->perfect == MINIMAL_HP) ? nkeys : smax)));
- memset((void *)tabq, 0, (size_t)(sizeof(qstuff)*(blen+1)));
-
- for (maxkeys=0,i=0; i<blen; ++i)
- if (tabb[i].listlen_b > maxkeys)
- maxkeys = tabb[i].listlen_b;
-
- /* In descending order by number of keys, map all *b*s */
- for (j=maxkeys; j>0; --j)
- for (i=0; i<blen; ++i)
- if (tabb[i].listlen_b == j)
- if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys,
- i+1, form))
- {
- printf("fail to map group of size %ld for tab size %ld\n", j, blen);
- return FALSE;
- }
-
- /* Success! We found a perfect hash of all keys into 0..nkeys-1. */
- return TRUE;
-}
-
-
-/*
- * Simple case: user gave (a,b). No more mixing, no guessing alen or blen.
- * This assumes a,b reside in (key->a_k, key->b_k), and final->form == AB_HK.
- */
-static void hash_ab(tabb, alen, blen, salt, final,
- scramble, smax, keys, nkeys, form)
-bstuff **tabb; /* output, tab[] of the perfect hash, length *blen */
-ub4 *alen; /* output, 0..alen-1 is range for a of (a,b) */
-ub4 *blen; /* output, 0..blen-1 is range for b of (a,b) */
-ub4 *salt; /* output, initializes initial hash */
-gencode *final; /* code for final hash */
-ub4 *scramble; /* input, hash = a^scramble[tab[b]] */
-ub4 *smax; /* input, scramble[i] in 0..smax-1 */
-key *keys; /* input, keys to hash */
-ub4 nkeys; /* input, number of keys being hashed */
-hashform *form; /* user directives */
-{
- hstuff *tabh;
- qstuff *tabq;
- key *mykey;
- ub4 i;
- int used_tab;
-
- /* initially make smax the first power of two bigger than nkeys */
- *smax = ((ub4)1<<mylog2(nkeys));
- scrambleinit(scramble, *smax);
-
- /* set *alen and *blen based on max A and B from user */
- *alen = 1;
- *blen = 1;
- for (mykey = keys; mykey != (key *)0; mykey = mykey->next_k)
- {
- while (*alen <= mykey->a_k) *alen *= 2;
- while (*blen <= mykey->b_k) *blen *= 2;
- }
- if (*alen > 2**smax)
- {
- fprintf(stderr,
- "perfect.c: Can't deal with (A,B) having A bigger than twice \n");
- fprintf(stderr,
- " the smallest power of two greater or equal to any legal hash.\n");
- exit(SUCCESS);
- }
-
- /* allocate working memory */
- *tabb = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen)));
- tabq = (qstuff *)remalloc(sizeof(qstuff)*(*blen+1), "perfect.c, tabq");
- tabh = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ?
- nkeys : *smax),
- "perfect.c, tabh");
-
- /* check that (a,b) are distinct and put them in tabb indexed by b */
- (void)inittab(*tabb, *blen, keys, form, FALSE);
-
- /* try with smax */
- if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
- {
- if (form->perfect == MINIMAL_HP)
- {
- printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
- exit(SUCCESS);
- }
- else
- {
- /* try with 2*smax */
- free((void *)tabh);
- *smax = *smax * 2;
- scrambleinit(scramble, *smax);
- tabh = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ?
- nkeys : *smax),
- "perfect.c, tabh");
- if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
- {
- printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
- exit(SUCCESS);
- }
- }
- }
-
- /* check if tab[] was really needed */
- for (i=0; i<*blen; ++i)
- {
- if ((*tabb)[i].val_b != 0) break; /* assumes permute(0) == 0 */
- }
- used_tab = (i < *blen);
-
- /* write the code for the perfect hash */
- *salt = 1;
- final->used = 1;
- if (!used_tab)
- {
- sprintf(final->line[0], " ub4 rsl = a;\n");
- }
- else if (*blen < USE_SCRAMBLE)
- {
- sprintf(final->line[0], " ub4 rsl = (a ^ tab[b]);\n");
- }
- else
- {
- sprintf(final->line[0], " ub4 rsl = (a ^ scramble[tab[b]]);\n");
- }
-
- printf("success, found a perfect hash\n");
-
- free((void *)tabq);
- free((void *)tabh);
-}
-
-
-/* guess initial values for alen and blen */
-static void initalen(alen, blen, smax, nkeys, form)
-ub4 *alen; /* output, initial alen */
-ub4 *blen; /* output, initial blen */
-ub4 *smax; /* input, power of two greater or equal to max hash value */
-ub4 nkeys; /* number of keys being hashed */
-hashform *form; /* user directives */
-{
- /*
- * Find initial *alen, *blen
- * Initial alen and blen values were found empirically. Some factors:
- *
- * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1.
- *
- * alen and blen must be powers of 2 because the values in 0..alen-1 and
- * 0..blen-1 are produced by applying a bitmask to the initial hash function.
- *
- * alen must be less than smax, in fact less than nkeys, because otherwise
- * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for
- * all the *a*s associated with a given *b*, so there would be no legal
- * value to assign to tab[b]. This only matters when we're doing a minimal
- * perfect hash.
- *
- * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8)
- * and alen*blen = smax*smax/32.
- *
- * Values of blen less than smax/4 never work, and smax/2 always works.
- *
- * We want blen as small as possible because it is the number of bytes in
- * the huge array we must create for the perfect hash.
- *
- * When nkey <= smax*(5/8), blen=smax/4 works much more often with
- * alen=smax/8 than with alen=smax/4. Above smax*(5/8), blen=smax/4
- * doesn't seem to care whether alen=smax/8 or alen=smax/4. I think it
- * has something to do with 5/8 = 1/8 * 5. For example examine 80000,
- * 85000, and 90000 keys with different values of alen. This only matters
- * if we're doing a minimal perfect hash.
- *
- * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer.
- * Bigger than that it must produce two integers, which increases the
- * cost of the hash per character hashed.
- */
- if (form->perfect == NORMAL_HP)
- {
- if ((form->speed == FAST_HS) && (nkeys > *smax*0.8))
- {
- *smax = *smax * 2;
- }
-
- *alen = ((form->hashtype==INT_HT) && *smax>131072) ?
- ((ub4)1<<(UB4BITS-mylog2(*blen))) : /* distinct keys => distinct (A,B) */
- *smax; /* no reason to restrict alen to smax/2 */
- if ((form->hashtype == INT_HT) && *smax < 32)
- *blen = *smax; /* go for function speed not space */
- else if (*smax/4 <= (1<<14))
- *blen = ((nkeys <= *smax*0.56) ? *smax/32 :
- (nkeys <= *smax*0.74) ? *smax/16 : *smax/8);
- else
- *blen = ((nkeys <= *smax*0.6) ? *smax/16 :
- (nkeys <= *smax*0.8) ? *smax/8 : *smax/4);
-
- if ((form->speed == FAST_HS) && (*blen < *smax/8))
- *blen = *smax/8;
-
- if (*alen < 1) *alen = 1;
- if (*blen < 1) *blen = 1;
- }
- else
- {
- switch(mylog2(*smax))
- {
- case 0:
- *alen = 1;
- *blen = 1;
- case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
- *alen = (form->perfect == NORMAL_HP) ? *smax : *smax/2;
- *blen = *smax/2;
- break;
- case 9:
- case 10:
- case 11:
- case 12:
- case 13:
- case 14:
- case 15:
- case 16:
- case 17:
- if (form->speed == FAST_HS)
- {
- *alen = *smax/2;
- *blen = *smax/4;
- }
- else if (*smax/4 < USE_SCRAMBLE)
- {
- *alen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
- *blen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
- }
- else
- {
- *alen = ((nkeys <= *smax*(5.0/8.0)) ? *smax/8 :
- (nkeys <= *smax*(3.0/4.0)) ? *smax/4 : *smax/2);
- *blen = *smax/4; /* always give the small size a shot */
- }
- break;
- case 18:
- if (form->speed == FAST_HS)
- {
- *alen = *smax/2;
- *blen = *smax/2;
- }
- else
- {
- *alen = *smax/8; /* never require the multiword hash */
- *blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
- }
- break;
- case 19:
- case 20:
- *alen = (nkeys <= *smax*(5.0/8.0)) ? *smax/8 : *smax/2;
- *blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
- break;
- default:
- *alen = *smax/2; /* just find a hash as quick as possible */
- *blen = *smax/2; /* we'll be thrashing virtual memory at this size */
- break;
- }
- }
-}
-
-/*
-** Try to find a perfect hash function.
-** Return the successful initializer for the initial hash.
-** Return 0 if no perfect hash could be found.
-*/
-void findhash(tabb, alen, blen, salt, final,
- scramble, smax, keys, nkeys, form)
-bstuff **tabb; /* output, tab[] of the perfect hash, length *blen */
-ub4 *alen; /* output, 0..alen-1 is range for a of (a,b) */
-ub4 *blen; /* output, 0..blen-1 is range for b of (a,b) */
-ub4 *salt; /* output, initializes initial hash */
-gencode *final; /* code for final hash */
-ub4 *scramble; /* input, hash = a^scramble[tab[b]] */
-ub4 *smax; /* input, scramble[i] in 0..smax-1 */
-key *keys; /* input, keys to hash */
-ub4 nkeys; /* input, number of keys being hashed */
-hashform *form; /* user directives */
-{
- ub4 bad_initkey; /* how many times did initkey fail? */
- ub4 bad_perfect; /* how many times did perfect fail? */
- ub4 trysalt; /* trial initializer for initial hash */
- ub4 maxalen;
- hstuff *tabh; /* table of keys indexed by hash value */
- qstuff *tabq; /* table of stuff indexed by queue value, used by augment */
-
- /* The case of (A,B) supplied by the user is a special case */
- if (form->hashtype == AB_HT)
- {
- hash_ab(tabb, alen, blen, salt, final,
- scramble, smax, keys, nkeys, form);
- return;
- }
-
- /* guess initial values for smax, alen and blen */
- *smax = ((ub4)1<<mylog2(nkeys));
- initalen(alen, blen, smax, nkeys, form);
-
- scrambleinit(scramble, *smax);
-
- maxalen = (form->perfect == MINIMAL_HP) ? *smax/2 : *smax;
-
- /* allocate working memory */
- *tabb = (bstuff *)remalloc((size_t)(sizeof(bstuff)*(*blen)),
- "perfect.c, tabb");
- tabq = (qstuff *)remalloc(sizeof(qstuff)*(*blen+1), "perfect.c, tabq");
- tabh = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ?
- nkeys : *smax),
- "perfect.c, tabh");
-
- /* Actually find the perfect hash */
- *salt = 0;
- bad_initkey = 0;
- bad_perfect = 0;
- for (trysalt=1; ; ++trysalt)
- {
- ub4 rslinit;
- /* Try to find distinct (A,B) for all keys */
-
- rslinit = initkey(keys, nkeys, *tabb, *alen, *blen, *smax, trysalt,
- form, final);
-
- if (rslinit == 2)
- { /* initkey actually found a perfect hash, not just distinct (a,b) */
- *salt = 1;
- *blen = 0;
- break;
- }
- else if (rslinit == 0)
- {
- /* didn't find distinct (a,b) */
- if (++bad_initkey >= RETRY_INITKEY)
- {
- /* Try to put more bits in (A,B) to make distinct (A,B) more likely */
- if (*alen < maxalen)
- {
- *alen *= 2;
- }
- else if (*blen < *smax)
- {
- *blen *= 2;
- free(tabq);
- free(*tabb);
- *tabb = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen)));
- tabq = (qstuff *)malloc((size_t)(sizeof(qstuff)*(*blen+1)));
- }
- else
- {
- duplicates(*tabb, *blen, keys, form); /* check for duplicates */
- printf("fatal error: Cannot perfect hash: cannot find distinct (A,B)\n");
- exit(SUCCESS);
- }
- bad_initkey = 0;
- bad_perfect = 0;
- }
- continue; /* two keys have same (a,b) pair */
- }
-
- printf("found distinct (A,B) on attempt %ld\n", trysalt);
-
- /* Given distinct (A,B) for all keys, build a perfect hash */
- if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
- {
- if ((form->hashtype != INT_HT && ++bad_perfect >= RETRY_PERFECT) ||
- (form->hashtype == INT_HT && ++bad_perfect >= RETRY_HEX))
- {
- if (*blen < *smax)
- {
- *blen *= 2;
- free(*tabb);
- free(tabq);
- *tabb = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen)));
- tabq = (qstuff *)malloc((size_t)(sizeof(qstuff)*(*blen+1)));
- --trysalt; /* we know this salt got distinct (A,B) */
- }
- else
- {
- printf("fatal error: Cannot perfect hash: cannot build tab[]\n");
- exit(SUCCESS);
- }
- bad_perfect = 0;
- }
- continue;
- }
-
- *salt = trysalt;
- break;
- }
-
- printf("built perfect hash table of size %ld\n", *blen);
-
- /* free working memory */
- free((void *)tabh);
- free((void *)tabq);
-}
-
-/*
-------------------------------------------------------------------------------
-Input/output type routines
-------------------------------------------------------------------------------
-*/
-
-/* get the list of keys */
-static void getkeys(keys, nkeys, textroot, keyroot, form)
-key **keys; /* list of all keys */
-ub4 *nkeys; /* number of keys */
-reroot *textroot; /* get space to store key text */
-reroot *keyroot; /* get space for keys */
-hashform *form; /* user directives */
-{
- key *mykey;
- char *mytext;
- mytext = (char *)renew(textroot);
- *keys = 0;
- *nkeys = 0;
- while (fgets(mytext, MAXKEYLEN, stdin))
- {
- mykey = (key *)renew(keyroot);
- if (form->mode == AB_HM)
- {
- sscanf(mytext, "%lx %lx ", &mykey->a_k, &mykey->b_k);
- }
- else if (form->mode == ABDEC_HM)
- {
- sscanf(mytext, "%ld %ld ", &mykey->a_k, &mykey->b_k);
- }
- else if (form->mode == HEX_HM)
- {
- sscanf(mytext, "%lx ", &mykey->hash_k);
- }
- else if (form->mode == DECIMAL_HM)
- {
- sscanf(mytext, "%ld ", &mykey->hash_k);
- }
- else
- {
- mykey->name_k = (ub1 *)mytext;
- mytext = (char *)renew(textroot);
- mykey->len_k = (ub4)(strlen((char *)mykey->name_k)-1);
- }
- mykey->next_k = *keys;
- *keys = mykey;
- ++*nkeys;
- }
- redel(textroot, mytext);
-}
-
-/* make the .h file */
-static void make_h(blen, smax, nkeys, salt)
-ub4 blen;
-ub4 smax;
-ub4 nkeys;
-ub4 salt;
-{
- FILE *f;
- f = fopen("phash.h", "w");
- fprintf(f, "/* Perfect hash definitions */\n");
- fprintf(f, "#ifndef STANDARD\n");
- fprintf(f, "#include \"standard.h\"\n");
- fprintf(f, "#endif /* STANDARD */\n");
- fprintf(f, "#ifndef PHASH\n");
- fprintf(f, "#define PHASH\n");
- fprintf(f, "\n");
- if (blen > 0)
- {
- if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
- fprintf(f, "extern ub1 tab[];\n");
- else
- {
- fprintf(f, "extern ub2 tab[];\n");
- if (blen >= USE_SCRAMBLE)
- {
- if (smax <= UB2MAXVAL+1)
- fprintf(f, "extern ub2 scramble[];\n");
- else
- fprintf(f, "extern ub4 scramble[];\n");
- }
- }
- fprintf(f, "#define PHASHLEN 0x%lx /* length of hash mapping table */\n",
- blen);
- }
- fprintf(f, "#define PHASHNKEYS %ld /* How many keys were hashed */\n",
- nkeys);
- fprintf(f, "#define PHASHRANGE %ld /* Range any input might map to */\n",
- smax);
- fprintf(f, "#define PHASHSALT 0x%.8lx /* internal, initialize normal hash */\n",
- salt*0x9e3779b9);
- fprintf(f, "\n");
- fprintf(f, "ub4 phash();\n");
- fprintf(f, "\n");
- fprintf(f, "#endif /* PHASH */\n");
- fprintf(f, "\n");
- fclose(f);
-}
-
-/* make the .c file */
-static void make_c(tab, smax, blen, scramble, final, form)
-bstuff *tab; /* table indexed by b */
-ub4 smax; /* range of scramble[] */
-ub4 blen; /* b in 0..blen-1, power of 2 */
-ub4 *scramble; /* used in final hash */
-gencode *final; /* code for the final hash */
-hashform *form; /* user directives */
-{
- ub4 i;
- FILE *f;
- f = fopen("phash.c", "w");
- fprintf(f, "/* table for the mapping for the perfect hash */\n");
- fprintf(f, "#ifndef STANDARD\n");
- fprintf(f, "#include \"standard.h\"\n");
- fprintf(f, "#endif /* STANDARD */\n");
- fprintf(f, "#ifndef PHASH\n");
- fprintf(f, "#include \"phash.h\"\n");
- fprintf(f, "#endif /* PHASH */\n");
- fprintf(f, "#ifndef LOOKUPA\n");
- fprintf(f, "#include \"lookupa.h\"\n");
- fprintf(f, "#endif /* LOOKUPA */\n");
- fprintf(f, "\n");
- if (blen >= USE_SCRAMBLE)
- {
- fprintf(f, "/* A way to make the 1-byte values in tab bigger */\n");
- if (smax > UB2MAXVAL+1)
- {
- fprintf(f, "ub4 scramble[] = {\n");
- for (i=0; i<=UB1MAXVAL; i+=4)
- fprintf(f, "0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n",
- scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
- }
- else
- {
- fprintf(f, "ub2 scramble[] = {\n");
- for (i=0; i<=UB1MAXVAL; i+=8)
- fprintf(f,
-"0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n",
- scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
- scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
- }
- fprintf(f, "};\n");
- fprintf(f, "\n");
- }
- if (blen > 0)
- {
- fprintf(f, "/* small adjustments to _a_ to make values distinct */\n");
-
- if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
- fprintf(f, "ub1 tab[] = {\n");
- else
- fprintf(f, "ub2 tab[] = {\n");
-
- if (blen < 16)
- {
- for (i=0; i<blen; ++i) fprintf(f, "%3d,", scramble[tab[i].val_b]);
- }
- else if (blen <= 1024)
- {
- for (i=0; i<blen; i+=16)
- fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
- scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
- scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
- scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
- scramble[tab[i+6].val_b], scramble[tab[i+7].val_b],
- scramble[tab[i+8].val_b], scramble[tab[i+9].val_b],
- scramble[tab[i+10].val_b], scramble[tab[i+11].val_b],
- scramble[tab[i+12].val_b], scramble[tab[i+13].val_b],
- scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]);
- }
- else if (blen < USE_SCRAMBLE)
- {
- for (i=0; i<blen; i+=8)
- fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
- scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
- scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
- scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
- scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]);
- }
- else
- {
- for (i=0; i<blen; i+=16)
- fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
- tab[i+0].val_b, tab[i+1].val_b,
- tab[i+2].val_b, tab[i+3].val_b,
- tab[i+4].val_b, tab[i+5].val_b,
- tab[i+6].val_b, tab[i+7].val_b,
- tab[i+8].val_b, tab[i+9].val_b,
- tab[i+10].val_b, tab[i+11].val_b,
- tab[i+12].val_b, tab[i+13].val_b,
- tab[i+14].val_b, tab[i+15].val_b);
- }
- fprintf(f, "};\n");
- fprintf(f, "\n");
- }
- fprintf(f, "/* The hash function */\n");
- switch(form->mode)
- {
- case NORMAL_HM:
- fprintf(f, "ub4 phash(key, len)\n");
- fprintf(f, "char *key;\n");
- fprintf(f, "int len;\n");
- break;
- case INLINE_HM:
- case HEX_HM:
- case DECIMAL_HM:
- fprintf(f, "ub4 phash(val)\n");
- fprintf(f, "ub4 val;\n");
- break;
- case AB_HM:
- case ABDEC_HM:
- fprintf(f, "ub4 phash(a,b)\n");
- fprintf(f, "ub4 a;\n");
- fprintf(f, "ub4 b;\n");
- break;
- }
- fprintf(f, "{\n");
- for (i=0; i<final->used; ++i)
- fprintf(f, final->line[i]);
- fprintf(f, " return rsl;\n");
- fprintf(f, "}\n");
- fprintf(f, "\n");
- fclose(f);
-}
-
-/*
-------------------------------------------------------------------------------
-Read in the keys, find the hash, and write the .c and .h files
-------------------------------------------------------------------------------
-*/
-static void driver(form)
-hashform *form; /* user directives */
-{
- ub4 nkeys; /* number of keys */
- key *keys; /* head of list of keys */
- bstuff *tab; /* table indexed by b */
- ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */
- ub4 alen; /* a in 0..alen-1, a power of 2 */
- ub4 blen; /* b in 0..blen-1, a power of 2 */
- ub4 salt; /* a parameter to the hash function */
- reroot *textroot; /* MAXKEYLEN-character text lines */
- reroot *keyroot; /* source of keys */
- gencode final; /* code for final hash */
- ub4 i;
- ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */
- char buf[10][80]; /* buffer for generated code */
- char *buf2[10]; /* also for generated code */
-
- /* set up memory sources */
- textroot = remkroot((size_t)MAXKEYLEN);
- keyroot = remkroot(sizeof(key));
-
- /* set up code for final hash */
- final.line = buf2;
- final.used = 0;
- final.len = 10;
- for (i=0; i<10; ++i) final.line[i] = buf[i];
-
- /* read in the list of keywords */
- getkeys(&keys, &nkeys, textroot, keyroot, form);
- printf("Read in %ld keys\n",nkeys);
-
- /* find the hash */
- findhash(&tab, &alen, &blen, &salt, &final,
- scramble, &smax, keys, nkeys, form);
-
- /* generate the phash.h file */
- make_h(blen, smax, nkeys, salt);
- printf("Wrote phash.h\n");
-
- /* generate the phash.c file */
- make_c(tab, smax, blen, scramble, &final, form);
- printf("Wrote phash.c\n");
-
- /* clean up memory sources */
- refree(textroot);
- refree(keyroot);
- free((void *)tab);
- printf("Cleaned up\n");
-}
-
-
-/* Describe how to use this utility */
-static void usage_error()
-{
- printf("Usage: perfect [-{NnIiHhDdAaBb}{MmPp}{FfSs}] < key.txt \n");
- printf("The input is a list of keys, one key per line.\n");
- printf("Only one of NnIiHhDdAa and one of MmPp may be specified.\n");
- printf(" N,n: normal mode, key is any string string (default).\n");
- printf(" I,i: initial hash for ASCII char strings.\n");
- printf("The initial hash must be\n");
- printf(" hash = PHASHSALT;\n");
- printf(" for (i=0; i<keylength; ++i) {\n");
- printf(" hash = (hash ^ key[i]) + ((hash<<26)+(hash>>6));\n");
- printf(" }\n");
- printf("Note that this can be inlined in any user loop that walks\n");
- printf("through the key anyways, eliminating the loop overhead.\n");
- printf(" H,h: Keys are 4-byte integers in hex in this format:\n");
- printf("ffffffff\n");
- printf("This is good for optimizing switch statement compilation.\n");
- printf(" D,d: Same as H,h, except in decimal not hexidecimal\n");
- printf(" A,a: An (A,B) pair is supplied in hex in this format:\n");
- printf("aaa bbb\n");
- printf(" B,b: Same as A,a, except in decimal not hexidecimal\n");
- printf("This mode does nothing but find the values of tab[].\n");
- printf("*A* must be less than the total number of keys.\n");
- printf(" M,m: Minimal perfect hash. Hash will be in 0..nkeys-1 (default)\n");
- printf(" P,p: Perfect hash. Hash will be in 0..n-1, where n >= nkeys\n");
- printf("and n is a power of 2. Will probably use a smaller tab[].");
- printf(" F,f: Fast mode. Generate the perfect hash fast.\n");
- printf(" S,s: Slow mode. Spend time finding a good perfect hash.\n");
-
- exit(SUCCESS);
-}
-
-
-/* Interpret arguments and call the driver */
-/* See usage_error for the expected arguments */
-int main(argc, argv)
-int argc;
-char **argv;
-{
- int mode_given = FALSE;
- int minimal_given = FALSE;
- int speed_given = FALSE;
- hashform form;
- char *c;
-
- /* default behavior */
- form.mode = NORMAL_HM;
- form.hashtype = STRING_HT;
- form.perfect = MINIMAL_HP;
- form.speed = SLOW_HS;
-
- /* let the user override the default behavior */
- switch (argc)
- {
- case 1:
- break;
- case 2:
- if (argv[1][0] != '-')
- {
- usage_error();
- break;
- }
- for (c = &argv[1][1]; *c != '\0'; ++c) switch(*c)
- {
- case 'n': case 'N':
- case 'i': case 'I':
- case 'h': case 'H':
- case 'd': case 'D':
- case 'a': case 'A':
- case 'b': case 'B':
- if (mode_given == TRUE)
- usage_error();
- switch(*c)
- {
- case 'n': case 'N':
- form.mode = NORMAL_HM; form.hashtype = STRING_HT; break;
- case 'i': case 'I':
- form.mode = INLINE_HM; form.hashtype = STRING_HT; break;
- case 'h': case 'H':
- form.mode = HEX_HM; form.hashtype = INT_HT; break;
- case 'd': case 'D':
- form.mode = DECIMAL_HM; form.hashtype = INT_HT; break;
- case 'a': case 'A':
- form.mode = AB_HM; form.hashtype = AB_HT; break;
- case 'b': case 'B':
- form.mode = ABDEC_HM; form.hashtype = AB_HT; break;
- }
- mode_given = TRUE;
- break;
- case 'm': case 'M':
- case 'p': case 'P':
- if (minimal_given == TRUE)
- usage_error();
- switch(*c)
- {
- case 'p': case 'P':
- form.perfect = NORMAL_HP; break;
- case 'm': case 'M':
- form.perfect = MINIMAL_HP; break;
- }
- minimal_given = TRUE;
- break;
- case 'f': case 'F':
- case 's': case 'S':
- if (speed_given == TRUE)
- usage_error();
- switch(*c)
- {
- case 'f': case 'F':
- form.speed = FAST_HS; break;
- case 's': case 'S':
- form.speed = SLOW_HS; break;
- }
- speed_given = TRUE;
- break;
- default:
- usage_error();
- }
- break;
- default:
- usage_error();
- }
-
- /* Generate the [minimal] perfect hash */
- driver(&form);
-
- return SUCCESS;
-}
diff --git a/tools/codegen/core/perfect/perfect.h b/tools/codegen/core/perfect/perfect.h
deleted file mode 100644
index fed5296bb7..0000000000
--- a/tools/codegen/core/perfect/perfect.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
-------------------------------------------------------------------------------
-perfect.h: code to generate code for a hash for perfect hashing.
-(c) Bob Jenkins, September 1996
-You may use this code in any way you wish, and it is free. No warranty.
-I hereby place this in the public domain.
-Source is http://burtleburtle.net/bob/c/perfect.h
-------------------------------------------------------------------------------
-*/
-
-#ifndef STANDARD
-#include "standard.h"
-#endif
-
-#ifndef PERFECT
-#define PERFECT
-
-#define MAXKEYLEN 30 /* maximum length of a key */
-#define USE_SCRAMBLE 4096 /* use scramble if blen >= USE_SCRAMBLE */
-#define SCRAMBLE_LEN ((ub4)1<<16) /* length of *scramble* */
-#define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */
-#define RETRY_PERFECT 1 /* number of times to try to make a perfect hash */
-#define RETRY_HEX 200 /* RETRY_PERFECT when hex keys given */
-
-/* the generated code for the final hash, assumes initial hash is done */
-struct gencode
-{
- char **line; /* array of text lines, 80 bytes apiece */
- /*
- * The code placed here must declare "ub4 rsl"
- * and assign it the value of the perfect hash using the function inputs.
- * Later code will be tacked on which returns rsl or manipulates it according
- * to the user directives.
- *
- * This code is at the top of the routine; it may and must declare any
- * local variables it needs.
- *
- * Each way of filling in **line should be given a comment that is a unique
- * tag. A testcase named with that tag should also be found which tests
- * the generated code.
- */
- ub4 len; /* number of lines available for final hash */
- ub4 used; /* number of lines used by final hash */
-
- ub4 lowbit; /* for HEX, lowest interesting bit */
- ub4 highbit; /* for HEX, highest interesting bit */
- ub4 diffbits; /* bits which differ for some key */
- ub4 i,j,k,l,m,n,o; /* state machine used in hexn() */
-};
-typedef struct gencode gencode;
-
-/* user directives: perfect hash? minimal perfect hash? input is an int? */
-struct hashform
-{
- enum {
- NORMAL_HM, /* key is a string */
- INLINE_HM, /* user will do initial hash, we must choose salt for them */
- HEX_HM, /* key to be hashed is a hexidecimal 4-byte integer */
- DECIMAL_HM, /* key to be hashed is a decimal 4-byte integer */
- AB_HM, /* key to be hashed is "A B", where A and B are (A,B) in hex */
- ABDEC_HM /* like AB_HM, but in decimal */
- } mode;
- enum {
- STRING_HT, /* key is a string */
- INT_HT, /* key is an integer */
- AB_HT /* dunno what key is, but input is distinct (A,B) pair */
- } hashtype;
- enum {
- NORMAL_HP, /* just find a perfect hash */
- MINIMAL_HP /* find a minimal perfect hash */
- } perfect;
- enum {
- FAST_HS, /* fast mode */
- SLOW_HS /* slow mode */
- } speed;
-};
-typedef struct hashform hashform;
-
-/* representation of a key */
-struct key
-{
- ub1 *name_k; /* the actual key */
- ub4 len_k; /* the length of the actual key */
- ub4 hash_k; /* the initial hash value for this key */
- struct key *next_k; /* next key */
-/* beyond this point is mapping-dependent */
- ub4 a_k; /* a, of the key maps to (a,b) */
- ub4 b_k; /* b, of the key maps to (a,b) */
- struct key *nextb_k; /* next key with this b */
-};
-typedef struct key key;
-
-/* things indexed by b of original (a,b) pair */
-struct bstuff
-{
- ub2 val_b; /* hash=a^tabb[b].val_b */
- key *list_b; /* tabb[i].list_b is list of keys with b==i */
- ub4 listlen_b; /* length of list_b */
- ub4 water_b; /* high watermark of who has visited this map node */
-};
-typedef struct bstuff bstuff;
-
-/* things indexed by final hash value */
-struct hstuff
-{
- key *key_h; /* tabh[i].key_h is the key with a hash of i */
-};
-typedef struct hstuff hstuff;
-
-/* things indexed by queue position */
-struct qstuff
-{
- bstuff *b_q; /* b that currently occupies this hash */
- ub4 parent_q; /* queue position of parent that could use this hash */
- ub2 newval_q; /* what to change parent tab[b] to to use this hash */
- ub2 oldval_q; /* original value of tab[b] */
-};
-typedef struct qstuff qstuff;
-
-/* return ceiling(log based 2 of x) */
-ub4 mylog2(/*_ ub4 x _*/);
-
-/* Given the keys, scramble[], and hash mode, find the perfect hash */
-void findhash(/*_ bstuff **tabb, ub4 *alen, ub4 *blen, ub4 *salt,
- gencode *final, ub4 *scramble, ub4 smax, key *keys, ub4 nkeys,
- hashform *form _*/);
-
-/* private, but in a different file because it's excessively verbose */
-int inithex(/*_ key *keys, ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys,
- ub4 salt, gencode *final, gencode *form _*/);
-
-#endif /* PERFECT */
diff --git a/tools/codegen/core/perfect/perfhex.c b/tools/codegen/core/perfect/perfhex.c
deleted file mode 100644
index 9c28dc734b..0000000000
--- a/tools/codegen/core/perfect/perfhex.c
+++ /dev/null
@@ -1,1308 +0,0 @@
-/*
-------------------------------------------------------------------------------
-perfhex.c: code to generate code for a hash for perfect hashing.
-(c) Bob Jenkins, December 31 1999
-You may use this code in any way you wish, and it is free. No warranty.
-I hereby place this in the public domain.
-Source is http://burtleburtle.net/bob/c/perfhex.c
-
-The task of this file is to do the minimal amount of mixing needed to
-find distinct (a,b) for each key when each key is a distinct ub4. That
-means trying all possible ways to mix starting with the fastest. The
-output is those (a,b) pairs and code in the *final* structure for producing
-those pairs.
-------------------------------------------------------------------------------
-*/
-
-#ifndef STANDARD
-#include "standard.h"
-#endif
-#ifndef LOOKUPA
-#include "lookupa.h"
-#endif
-#ifndef RECYCLE
-#include "recycle.h"
-#endif
-#ifndef PERFECT
-#include "perfect.h"
-#endif
-
-/*
- * Find a perfect hash when there is only one key. Zero instructions.
- * Hint: the one key always hashes to 0
- */
-static void hexone(keys, final)
-key *keys;
-gencode *final;
-{
- /* 1 key: the hash is always 0 */
- keys->a_k = 0;
- keys->b_k = 0;
- final->used = 1;
- sprintf(final->line[0], " ub4 rsl = 0;\n"); /* h1a: 37 */
-}
-
-
-
-/*
- * Find a perfect hash when there are only two keys. Max 2 instructions.
- * There exists a bit that is different for the two keys. Test it.
- * Note that a perfect hash of 2 keys is automatically minimal.
- */
-static void hextwo(keys, final)
-key *keys;
-gencode *final;
-{
- ub4 a = keys->hash_k;
- ub4 b = keys->next_k->hash_k;
- ub4 i;
-
- if (a == b)
- {
- printf("fatal error: duplicate keys\n");
- exit(SUCCESS);
- }
-
- final->used = 1;
-
- /* one instruction */
- if ((a&1) != (b&1))
- {
- sprintf(final->line[0], " ub4 rsl = (val & 1);\n"); /* h2a: 3,4 */
- return;
- }
-
- /* two instructions */
- for (i=0; i<UB4BITS; ++i)
- {
- if ((a&((ub4)1<<i)) != (b&((ub4)1<<i))) break;
- }
- /* h2b: 4,6 */
- sprintf(final->line[0], " ub4 rsl = ((val << %ld) & 1);\n", i);
-}
-
-
-
-/*
- * find the value to xor to a and b and c to make none of them 3
- * assert, (a,b,c) are three distinct values in (0,1,2,3).
- */
-static ub4 find_adder(a,b,c)
-ub4 a;
-ub4 b;
-ub4 c;
-{
- return (a^b^c^3);
-}
-
-
-
-/*
- * Find a perfect hash when there are only three keys. Max 6 instructions.
- *
- * keys a,b,c.
- * There exists bit i such that a[i] != b[i].
- * Either c[i] != a[i] or c[i] != b[i], assume c[i] != a[i].
- * There exists bit j such that b[j] != c[j]. Note i != j.
- * Final hash should be no longer than val[i]^val[j].
- *
- * A minimal perfect hash needs to xor one of 0,1,2,3 afterwards to cause
- * the hole to land on 3. find_adder() finds that constant
- */
-static void hexthree(keys, final, form)
-key *keys;
-gencode *final;
-hashform *form;
-{
- ub4 a = keys->hash_k;
- ub4 b = keys->next_k->hash_k;
- ub4 c = keys->next_k->next_k->hash_k;
- ub4 i,j,x,y,z;
-
- final->used = 1;
-
- if (a == b || a == c || b == c)
- {
- printf("fatal error: duplicate keys\n");
- exit(SUCCESS);
- }
-
- /* one instruction */
- x = a&3;
- y = b&3;
- z = c&3;
- if (x != y && x != z && y != z)
- {
- if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
- {
- /* h3a: 0,1,2 */
- sprintf(final->line[0], " ub4 rsl = (val & 3);\n");
- }
- else
- {
- /* h3b: 0,3,2 */
- sprintf(final->line[0], " ub4 rsl = ((val & 3) ^ %d);\n",
- find_adder(x,y,z));
- }
- return;
- }
-
- x = a>>(UB4BITS-2);
- y = b>>(UB4BITS-2);
- z = c>>(UB4BITS-2);
- if (x != y && x != z && y != z)
- {
- if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
- {
- /* h3c: 3fffffff, 7fffffff, bfffffff */
- sprintf(final->line[0], " ub4 rsl = (val >> %ld);\n", (ub4)(UB4BITS-2));
- }
- else
- {
- /* h3d: 7fffffff, bfffffff, ffffffff */
- sprintf(final->line[0], " ub4 rsl = ((val >> %ld) ^ %ld);\n",
- (ub4)(UB4BITS-2), find_adder(x,y,z));
- }
- return;
- }
-
- /* two instructions */
- for (i=0; i<final->highbit; ++i)
- {
- x = (a>>i)&3;
- y = (b>>i)&3;
- z = (c>>i)&3;
- if (x != y && x != z && y != z)
- {
- if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
- {
- /* h3e: ffff3fff, ffff7fff, ffffbfff */
- sprintf(final->line[0], " ub4 rsl = ((val >> %ld) & 3);\n", i);
- }
- else
- {
- /* h3f: ffff7fff, ffffbfff, ffffffff */
- sprintf(final->line[0], " ub4 rsl = (((val >> %ld) & 3) ^ %ld);\n", i,
- find_adder(x,y,z));
- }
- return;
- }
- }
-
- /* three instructions */
- for (i=0; i<=final->highbit; ++i)
- {
- x = (a+(a>>i))&3;
- y = (b+(b>>i))&3;
- z = (c+(c>>i))&3;
- if (x != y && x != z && y != z)
- {
- if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
- {
- /* h3g: 0x000, 0x001, 0x100 */
- sprintf(final->line[0], " ub4 rsl = ((val+(val>>%ld))&3);\n", i);
- }
- else
- {
- /* h3h: 0x001, 0x100, 0x101 */
- sprintf(final->line[0], " ub4 rsl = (((val+(val>>%ld))&3)^%ld);\n", i,
- find_adder(x,y,z));
- }
- return;
- }
- }
-
- /*
- * Four instructions: I can prove this will always work.
- *
- * If the three values are distinct, there are two bits which
- * distinguish them. Choose the two such bits that are closest together.
- * If those bits are values 001 and 100 for those three values,
- * then there either aren't any bits in between
- * or the in-between bits aren't valued 001, 110, 100, 011, 010, or 101,
- * because that would violate the closest-together assumption.
- * So any in-between bits must be 000 or 111, and of 000 and 111 with
- * the distinguishing bits won't cause them to stop being distinguishing.
- */
- for (i=final->lowbit; i<=final->highbit; ++i)
- {
- for (j=i; j<=final->highbit; ++j)
- {
- x = ((a>>i)^(a>>j))&3;
- y = ((b>>i)^(b>>j))&3;
- z = ((c>>i)^(c>>j))&3;
- if (x != y && x != z && y != z)
- {
- if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
- {
- /* h3i: 0x00, 0x04, 0x10 */
- sprintf(final->line[0],
- " ub4 rsl = (((val>>%ld) ^ (val>>%ld)) & 3);\n", i, j);
- }
- else
- {
- /* h3j: 0x04, 0x10, 0x14 */
- sprintf(final->line[0],
- " ub4 rsl = ((((val>>%ld) ^ (val>>%ld)) & 3) ^ %ld);\n",
- i, j, find_adder(x,y,z));
- }
- return;
- }
- }
- }
-
- printf("fatal error: hexthree\n");
- exit(SUCCESS);
-}
-
-
-
-/*
- * Check that a,b,c,d are some permutation of 0,1,2,3
- * Assume that a,b,c,d are all have values less than 32.
- */
-static int testfour(a,b,c,d)
-ub4 a;
-ub4 b;
-ub4 c;
-ub4 d;
-{
- ub4 mask = (1<<a)^(1<<b)^(1<<c)^(1<<d);
- return (mask == 0xf);
-}
-
-
-
-/*
- * Find a perfect hash when there are only four keys. Max 10 instructions.
- * Note that a perfect hash for 4 keys will automatically be minimal.
- */
-static void hexfour(keys, final)
-key *keys;
-gencode *final;
-{
- ub4 a = keys->hash_k;
- ub4 b = keys->next_k->hash_k;
- ub4 c = keys->next_k->next_k->hash_k;
- ub4 d = keys->next_k->next_k->next_k->hash_k;
- ub4 w,x,y,z;
- ub4 i,j,k;
-
- if (a==b || a==c || a==d || b==c || b==d || c==d)
- {
- printf("fatal error: Duplicate keys\n");
- exit(SUCCESS);
- }
-
- final->used = 1;
-
- /* one instruction */
- if ((final->diffbits & 3) == 3)
- {
- w = a&3;
- x = b&3;
- y = c&3;
- z = d&3;
- if (testfour(w,x,y,z))
- {
- sprintf(final->line[0], " ub4 rsl = (val & 3);\n"); /* h4a: 0,1,2,3 */
- return;
- }
- }
-
- if (((final->diffbits >> (UB4BITS-2)) & 3) == 3)
- {
- w = a>>(UB4BITS-2);
- x = b>>(UB4BITS-2);
- y = c>>(UB4BITS-2);
- z = d>>(UB4BITS-2);
- if (testfour(w,x,y,z))
- { /* h4b: 0fffffff, 4fffffff, 8fffffff, cfffffff */
- sprintf(final->line[0], " ub4 rsl = (val >> %ld);\n", (ub4)(UB4BITS-2));
- return;
- }
- }
-
- /* two instructions */
- for (i=final->lowbit; i<final->highbit; ++i)
- {
- if (((final->diffbits >> i) & 3) == 3)
- {
- w = (a>>i)&3;
- x = (b>>i)&3;
- y = (c>>i)&3;
- z = (d>>i)&3;
- if (testfour(w,x,y,z))
- { /* h4c: 0,2,4,6 */
- sprintf(final->line[0], " ub4 rsl = ((val >> %ld) & 3);\n", i);
- return;
- }
- }
- }
-
- /* three instructions (linear with the number of diffbits) */
- if ((final->diffbits & 3) != 0)
- {
- for (i=final->lowbit; i<=final->highbit; ++i)
- {
- if (((final->diffbits >> i) & 3) != 0)
- {
- w = (a+(a>>i))&3;
- x = (b+(b>>i))&3;
- y = (c+(c>>i))&3;
- z = (d+(d>>i))&3;
- if (testfour(w,x,y,z))
- { /* h4d: 0,1,2,4 */
- sprintf(final->line[0],
- " ub4 rsl = ((val + (val >> %ld)) & 3);\n", i);
- return;
- }
-
- w = (a-(a>>i))&3;
- x = (b-(b>>i))&3;
- y = (c-(c>>i))&3;
- z = (d-(d>>i))&3;
- if (testfour(w,x,y,z))
- { /* h4e: 0,1,3,5 */
- sprintf(final->line[0],
- " ub4 rsl = ((val - (val >> %ld)) & 3);\n", i);
- return;
- }
-
- /* h4f: ((val>>k)-val)&3: redundant with h4e */
-
- w = (a^(a>>i))&3;
- x = (b^(b>>i))&3;
- y = (c^(c>>i))&3;
- z = (d^(d>>i))&3;
- if (testfour(w,x,y,z))
- { /* h4g: 3,4,5,8 */
- sprintf(final->line[0],
- " ub4 rsl = ((val ^ (val >> %ld)) & 3);\n", i);
- return;
- }
- }
- }
- }
-
- /* four instructions (linear with the number of diffbits) */
- if ((final->diffbits & 3) != 0)
- {
- for (i=final->lowbit; i<=final->highbit; ++i)
- {
- if ((((final->diffbits >> i) & 1) != 0) &&
- ((final->diffbits & 2) != 0))
- {
- w = (a&3)^((a>>i)&1);
- x = (b&3)^((b>>i)&1);
- y = (c&3)^((c>>i)&1);
- z = (d&3)^((d>>i)&1);
- if (testfour(w,x,y,z))
- { /* h4h: 1,2,6,8 */
- sprintf(final->line[0],
- " ub4 rsl = ((val & 3) ^ ((val >> %ld) & 1));\n", i);
- return;
- }
-
- w = (a&2)^((a>>i)&1);
- x = (b&2)^((b>>i)&1);
- y = (c&2)^((c>>i)&1);
- z = (d&2)^((d>>i)&1);
- if (testfour(w,x,y,z))
- { /* h4i: 1,2,8,a */
- sprintf(final->line[0],
- " ub4 rsl = ((val & 2) ^ ((val >> %ld) & 1));\n", i);
- return;
- }
- }
-
- if ((((final->diffbits >> i) & 2) != 0) &&
- ((final->diffbits & 1) != 0))
- {
- w = (a&3)^((a>>i)&2);
- x = (b&3)^((b>>i)&2);
- y = (c&3)^((c>>i)&2);
- z = (d&3)^((d>>i)&2);
- if (testfour(w,x,y,z))
- { /* h4j: 0,1,3,4 */
- sprintf(final->line[0],
- " ub4 rsl = ((val & 3) ^ ((val >> %ld) & 2));\n", i);
- return;
- }
-
- w = (a&1)^((a>>i)&2);
- x = (b&1)^((b>>i)&2);
- y = (c&1)^((c>>i)&2);
- z = (d&1)^((d>>i)&2);
- if (testfour(w,x,y,z))
- { /* h4k: 1,4,7,8 */
- sprintf(final->line[0],
- " ub4 rsl = ((val & 1) ^ ((val >> %ld) & 2));\n", i);
- return;
- }
- }
- }
- }
-
- /* four instructions (quadratic in the number of diffbits) */
- for (i=final->lowbit; i<=final->highbit; ++i)
- {
- if (((final->diffbits >> i) & 1) == 1)
- {
- for (j=final->lowbit; j<=final->highbit; ++j)
- {
- if (((final->diffbits >> j) & 3) != 0)
- {
- /* test + */
- w = ((a>>i)+(a>>j))&3;
- x = ((b>>i)+(a>>j))&3;
- y = ((c>>i)+(a>>j))&3;
- z = ((d>>i)+(a>>j))&3;
- if (testfour(w,x,y,z))
- { /* h4l: testcase? */
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) + (val >> %ld)) & 3);\n",
- i, j);
- return;
- }
-
- /* test - */
- w = ((a>>i)-(a>>j))&3;
- x = ((b>>i)-(a>>j))&3;
- y = ((c>>i)-(a>>j))&3;
- z = ((d>>i)-(a>>j))&3;
- if (testfour(w,x,y,z))
- { /* h4m: testcase? */
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) - (val >> %ld)) & 3);\n",
- i, j);
- return;
- }
-
- /* test ^ */
- w = ((a>>i)^(a>>j))&3;
- x = ((b>>i)^(a>>j))&3;
- y = ((c>>i)^(a>>j))&3;
- z = ((d>>i)^(a>>j))&3;
- if (testfour(w,x,y,z))
- { /* h4n: testcase? */
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) ^ (val >> %ld)) & 3);\n",
- i, j);
- return;
- }
- }
- }
- }
- }
-
- /* five instructions (quadratic in the number of diffbits) */
- for (i=final->lowbit; i<=final->highbit; ++i)
- {
- if (((final->diffbits >> i) & 1) != 0)
- {
- for (j=final->lowbit; j<=final->highbit; ++j)
- {
- if (((final->diffbits >> j) & 3) != 0)
- {
- w = ((a>>j)&3)^((a>>i)&1);
- x = ((b>>j)&3)^((b>>i)&1);
- y = ((c>>j)&3)^((c>>i)&1);
- z = ((d>>j)&3)^((d>>i)&1);
- if (testfour(w,x,y,z))
- { /* h4o: 0,4,8,a */
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) & 3) ^ ((val >> %ld) & 1));\n",
- j, i);
- return;
- }
-
- w = ((a>>j)&2)^((a>>i)&1);
- x = ((b>>j)&2)^((b>>i)&1);
- y = ((c>>j)&2)^((c>>i)&1);
- z = ((d>>j)&2)^((d>>i)&1);
- if (testfour(w,x,y,z))
- { /* h4p: 0x04, 0x08, 0x10, 0x14 */
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) & 2) ^ ((val >> %ld) & 1));\n",
- j, i);
- return;
- }
- }
-
- if (i==0)
- {
- w = ((a>>j)^(a<<1))&3;
- x = ((b>>j)^(b<<1))&3;
- y = ((c>>j)^(c<<1))&3;
- z = ((d>>j)^(d<<1))&3;
- }
- else
- {
- w = ((a>>j)&3)^((a>>(i-1))&2);
- x = ((b>>j)&3)^((b>>(i-1))&2);
- y = ((c>>j)&3)^((c>>(i-1))&2);
- z = ((d>>j)&3)^((d>>(i-1))&2);
- }
- if (testfour(w,x,y,z))
- {
- if (i==0) /* h4q: 0,4,5,8 */
- {
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) ^ (val << 1)) & 3);\n",
- j);
- }
- else if (i==1) /* h4r: 0x01,0x09,0x0b,0x10 */
- {
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) & 3) ^ (val & 2));\n",
- j);
- }
- else /* h4s: 0,2,6,8 */
- {
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) & 3) ^ ((val >> %ld) & 2));\n",
- j, (i-1));
- }
- return;
- }
-
- w = ((a>>j)&1)^((a>>i)&2);
- x = ((b>>j)&1)^((b>>i)&2);
- y = ((c>>j)&1)^((c>>i)&2);
- z = ((d>>j)&1)^((d>>i)&2);
- if (testfour(w,x,y,z)) /* h4t: 0x20,0x14,0x10,0x06 */
- {
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) & 1) ^ ((val >> %ld) & 2));\n",
- j, i);
- return;
- }
- }
- }
- }
-
- /*
- * OK, bring out the big guns.
- * There exist three bits i,j,k which distinguish a,b,c,d.
- * i^(j<<1)^(k*q) is guaranteed to work for some q in {0,1,2,3},
- * proven by exhaustive search of all (8 choose 4) cases.
- * Find three such bits and try the 4 cases.
- * Linear with the number of diffbits.
- * Some cases below may duplicate some cases above. I did it that way
- * so that what is below is guaranteed to work, no matter what was
- * attempted above.
- * The generated hash is at most 10 instructions.
- */
- for (i=final->lowbit; i<UB4BITS; ++i)
- {
- y = (c>>i)&1;
- z = (d>>i)&1;
- if (y != z)
- break;
- }
-
- for (j=final->lowbit; j<UB4BITS; ++j)
- {
- x = ((b>>i)&1)^(((b>>j)&1)<<1);
- y = ((c>>i)&1)^(((c>>j)&1)<<1);
- z = ((d>>i)&1)^(((d>>j)&1)<<1);
- if (x != y && x != z && y != z)
- break;
- }
-
- for (k=final->lowbit; k<UB4BITS; ++k)
- {
- w = ((a>>i)&1)^(((a>>j)&1)<<1)^(((a>>k)&1)<<2);
- x = ((b>>i)&1)^(((b>>j)&1)<<1)^(((b>>k)&1)<<2);
- y = ((c>>i)&1)^(((c>>j)&1)<<1)^(((c>>k)&1)<<2);
- z = ((d>>i)&1)^(((d>>j)&1)<<1)^(((d>>k)&1)<<2);
- if (w != x && w != y && w != z && x != y && x != z && y != z)
- break;
- }
-
- /* Assert: bits i,j,k were found which distinguish a,b,c,d */
- if (i==UB4BITS || j==UB4BITS || k==UB4BITS)
- {
- printf("Fatal error: hexfour(), i %ld j %ld k %ld\n", i,j,k);
- exit(SUCCESS);
- }
-
- /* now try the four cases */
- {
- ub4 m,n,o,p;
-
- /* if any bit has two 1s and two 0s, make that bit o */
- if (((a>>i)&1)+((b>>i)&1)+((c>>i)&1)+((d>>i)&1) != 2)
- { m=j; n=k; o=i; }
- else if (((a>>j)&1)+((b>>j)&1)+((c>>j)&1)+((d>>j)&1) != 2)
- { m=i; n=k; o=j; }
- else
- { m=i; n=j; o=k; }
- if (m > n) {p=m; m=n; n=p; } /* guarantee m < n */
-
- /* printf("m %ld n %ld o %ld %ld %ld %ld %ld\n", m, n, o, w,x,y,z); */
-
- /* seven instructions, multiply bit o by 1 */
- w = (((a>>m)^(a>>o))&1)^((a>>(n-1))&2);
- x = (((b>>m)^(b>>o))&1)^((b>>(n-1))&2);
- y = (((c>>m)^(c>>o))&1)^((c>>(n-1))&2);
- z = (((d>>m)^(d>>o))&1)^((d>>(n-1))&2);
- if (testfour(w,x,y,z))
- {
- if (m>o) {p=m; m=o; o=p;} /* make sure m < o and m < n */
-
- if (m==0) /* 0,2,8,9 */
- {
- sprintf(final->line[0],
- " ub4 rsl = (((val^(val>>%ld))&1)^((val>>%ld)&2));\n", o, n-1);
- }
- else /* 0x00,0x04,0x10,0x12 */
- {
- sprintf(final->line[0],
- " ub4 rsl = ((((val>>%ld) ^ (val>>%ld)) & 1) ^ ((val>>%ld) & 2));\n",
- m, o, n-1);
- }
- return;
- }
-
- /* six to seven instructions, multiply bit o by 2 */
- w = ((a>>m)&1)^((((a>>n)^(a>>o))&1)<<1);
- x = ((b>>m)&1)^((((b>>n)^(b>>o))&1)<<1);
- y = ((c>>m)&1)^((((c>>n)^(c>>o))&1)<<1);
- z = ((d>>m)&1)^((((d>>n)^(d>>o))&1)<<1);
- if (testfour(w,x,y,z))
- {
- if (m==o-1) {p=n; n=o; o=p;} /* make m==n-1 if possible */
-
- if (m==0) /* 0,1,5,8 */
- {
- sprintf(final->line[0],
- " ub4 rsl = ((val & 1) ^ (((val>>%ld) ^ (val>>%ld)) & 2));\n",
- n-1, o-1);
- }
- else if (o==0) /* 0x00,0x04,0x05,0x10 */
- {
- sprintf(final->line[0],
- " ub4 rsl = (((val>>%ld) & 2) ^ (((val>>%ld) ^ val) & 1));\n",
- m-1, n);
- }
- else /* 0x00,0x02,0x0a,0x10 */
- {
- sprintf(final->line[0],
- " ub4 rsl = (((val>>%ld) & 1) ^ (((val>>%ld) ^ (val>>%ld)) & 2));\n",
- m, n-1, o-1);
- }
- return;
- }
-
- /* multiplying by 3 is a pain: seven or eight instructions */
- w = (((a>>m)&1)^((a>>(n-1))&2))^((a>>o)&1)^(((a>>o)&1)<<1);
- x = (((b>>m)&1)^((b>>(n-1))&2))^((b>>o)&1)^(((b>>o)&1)<<1);
- y = (((c>>m)&1)^((c>>(n-1))&2))^((c>>o)&1)^(((c>>o)&1)<<1);
- z = (((d>>m)&1)^((d>>(n-1))&2))^((d>>o)&1)^(((d>>o)&1)<<1);
- if (testfour(w,x,y,z))
- {
- final->used = 2;
- sprintf(final->line[0], " ub4 b = (val >> %ld) & 1;\n", o);
- if (m==o-1 && m==0) /* 0x02,0x10,0x11,0x18 */
- {
- sprintf(final->line[1],
- " ub4 rsl = ((val & 3) ^ ((val >> %ld) & 2) ^ b);\n", n-1);
- }
- else if (m==o-1) /* 0,4,6,c */
- {
- sprintf(final->line[1],
- " ub4 rsl = (((val >> %ld) & 3) ^ ((val >> %ld) & 2) ^ b);\n",
- m, n-1);
- }
- else if (m==n-1 && m==0) /* 02,0a,0b,18 */
- {
- sprintf(final->line[1],
- " ub4 rsl = ((val & 3) ^ b ^ (b << 1));\n");
- }
- else if (m==n-1) /* 0,2,4,8 */
- {
- sprintf(final->line[1],
- " ub4 rsl = (((val >> %ld) & 3) ^ b ^ (b << 1));\n", m);
- }
- else if (o==n-1 && m==0) /* h4am: not reached */
- {
- sprintf(final->line[1],
- " ub4 rsl = ((val & 1) ^ ((val >> %ld) & 3) ^ (b <<1 ));\n",
- o);
- }
- else if (o==n-1) /* 0x00,0x02,0x08,0x10 */
- {
- sprintf(final->line[1],
- " ub4 rsl = (((val >> %ld) & 1) ^ ((val >> %ld) & 3) ^ (b << 1));\n",
- m, o);
- }
- else if ((m != o-1) && (m != n-1) && (o != m-1) && (o != n-1))
- {
- final->used = 3;
- sprintf(final->line[0], " ub4 newval = val & 0x%lx;\n",
- (((ub4)1<<m)^((ub4)1<<n)^((ub4)1<<o)));
- if (o==0) /* 0x00,0x01,0x04,0x10 */
- {
- sprintf(final->line[1], " ub4 b = -newval;\n");
- }
- else /* 0x00,0x04,0x09,0x10 */
- {
- sprintf(final->line[1], " ub4 b = -(newval >> %ld);\n", o);
- }
- if (m==0) /* 0x00,0x04,0x09,0x10 */
- {
- sprintf(final->line[2],
- " ub4 rsl = ((newval ^ (newval>>%ld) ^ b) & 3);\n", n-1);
- }
- else /* 0x00,0x03,0x04,0x10 */
- {
- sprintf(final->line[2],
- " ub4 rsl = (((newval>>%ld) ^ (newval>>%ld) ^ b) & 3);\n",
- m, n-1);
- }
- }
- else if (o == m-1)
- {
- if (o==0) /* 0x02,0x03,0x0a,0x10 */
- {
- sprintf(final->line[0], " ub4 b = (val<<1) & 2;\n");
- }
- else if (o==1) /* 0x00,0x02,0x04,0x10 */
- {
- sprintf(final->line[0], " ub4 b = val & 2;\n");
- }
- else /* 0x00,0x04,0x08,0x20 */
- {
- sprintf(final->line[0], " ub4 b = (val>>%ld) & 2;\n", o-1);
- }
-
- if (o==0) /* 0x02,0x03,0x0a,0x10 */
- {
- sprintf(final->line[1],
- " ub4 rsl = ((val & 3) ^ ((val>>%ld) & 1) ^ b);\n",
- n);
- }
- else /* 0x00,0x02,0x04,0x10 */
- {
- sprintf(final->line[1],
- " ub4 rsl = (((val>>%ld) & 3) ^ ((val>>%ld) & 1) ^ b);\n",
- o, n);
- }
- }
- else /* h4ax: 10 instructions, but not reached */
- {
- sprintf(final->line[1],
- " ub4 rsl = (((val>>%ld) & 1) ^ ((val>>%ld) & 2) ^ b ^ (b<<1));\n",
- m, n-1);
- }
-
- return;
- }
-
- /* five instructions, multiply bit o by 0, covered before the big guns */
- w = ((a>>m)&1)^(a>>(n-1)&2);
- x = ((b>>m)&1)^(b>>(n-1)&2);
- y = ((c>>m)&1)^(c>>(n-1)&2);
- z = ((d>>m)&1)^(d>>(n-1)&2);
- if (testfour(w,x,y,z))
- { /* h4v, not reached */
- sprintf(final->line[0],
- " ub4 rsl = (((val>>%ld) & 1) ^ ((val>>%ld) & 2));\n", m, n-1);
- return;
- }
- }
-
- printf("fatal error: bug in hexfour!\n");
- exit(SUCCESS);
- return;
-}
-
-
-/* test if a_k is distinct and in range for all keys */
-static int testeight(keys, badmask)
-key *keys; /* keys being hashed */
-ub1 badmask; /* used for minimal perfect hashing */
-{
- ub1 mask = badmask;
- key *mykey;
-
- for (mykey=keys; mykey; mykey=mykey->next_k)
- {
- if (bit(mask, 1<<mykey->a_k)) return FALSE;
- bis(mask, 1<<mykey->a_k);
- }
- return TRUE;
-}
-
-
-
-/*
- * Try to find a perfect hash when there are five to eight keys.
- *
- * We can't deterministically find a perfect hash, but there's a reasonable
- * chance we'll get lucky. Give it a shot. Return TRUE if we succeed.
- */
-static int hexeight(keys, nkeys, final, form)
-key *keys;
-ub4 nkeys;
-gencode *final;
-hashform *form;
-{
- key *mykey; /* walk through the keys */
- ub4 i,j,k;
- ub1 badmask;
-
- printf("hexeight\n");
-
- /* what hash values should never be used? */
- badmask = 0;
- if (form->perfect == MINIMAL_HP)
- {
- for (i=nkeys; i<8; ++i)
- bis(badmask,(1<<i));
- }
-
- /* one instruction */
- for (mykey=keys; mykey; mykey=mykey->next_k)
- mykey->a_k = mykey->hash_k & 7;
- if (testeight(keys, badmask))
- { /* h8a */
- final->used = 1;
- sprintf(final->line[0], " ub4 rsl = (val & 7);\n");
- return TRUE;
- }
-
- /* two instructions */
- for (i=final->lowbit; i<=final->highbit-2; ++i)
- {
- for (mykey=keys; mykey; mykey=mykey->next_k)
- mykey->a_k = (mykey->hash_k >> i) & 7;
- if (testeight(keys, badmask))
- { /* h8b */
- final->used = 1;
- sprintf(final->line[0], " ub4 rsl = ((val >> %ld) & 7);\n", i);
- return TRUE;
- }
- }
-
- /* four instructions */
- for (i=final->lowbit; i<=final->highbit; ++i)
- {
- for (j=i+1; j<=final->highbit; ++j)
- {
- for (mykey=keys; mykey; mykey=mykey->next_k)
- mykey->a_k = ((mykey->hash_k >> i)+(mykey->hash_k >> j)) & 7;
- if (testeight(keys, badmask))
- {
- final->used = 1;
- if (i == 0) /* h8c */
- sprintf(final->line[0],
- " ub4 rsl = ((val + (val >> %ld)) & 7);\n", j);
- else /* h8d */
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) + (val >> %ld)) & 7);\n", i, j);
- return TRUE;
- }
-
- for (mykey=keys; mykey; mykey=mykey->next_k)
- mykey->a_k = ((mykey->hash_k >> i)^(mykey->hash_k >> j)) & 7;
- if (testeight(keys, badmask))
- {
- final->used = 1;
- if (i == 0) /* h8e */
- sprintf(final->line[0],
- " ub4 rsl = ((val ^ (val >> %ld)) & 7);\n", j);
- else /* h8f */
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) ^ (val >> %ld)) & 7);\n", i, j);
-
- return TRUE;
- }
-
- for (mykey=keys; mykey; mykey=mykey->next_k)
- mykey->a_k = ((mykey->hash_k >> i)-(mykey->hash_k >> j)) & 7;
- if (testeight(keys, badmask))
- {
- final->used = 1;
- if (i == 0) /* h8g */
- sprintf(final->line[0],
- " ub4 rsl = ((val - (val >> %ld)) & 7);\n", j);
- else /* h8h */
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) - (val >> %ld)) & 7);\n", i, j);
-
- return TRUE;
- }
- }
- }
-
-
- /* six instructions */
- for (i=final->lowbit; i<=final->highbit; ++i)
- {
- for (j=i+1; j<=final->highbit; ++j)
- {
- for (k=j+1; k<=final->highbit; ++k)
- {
- for (mykey=keys; mykey; mykey=mykey->next_k)
- mykey->a_k = ((mykey->hash_k >> i) +
- (mykey->hash_k >> j) +
- (mykey->hash_k >> k)) & 7;
- if (testeight(keys, badmask))
- { /* h8i */
- final->used = 1;
- sprintf(final->line[0],
- " ub4 rsl = (((val >> %ld) + (val >> %ld) + (val >> %ld)) & 7);\n",
- i, j, k);
- return TRUE;
- }
- }
- }
- }
-
-
- return FALSE;
-}
-
-
-
-/*
- * Guns aren't enough. Bring out the Bomb. Use tab[].
- * This finds the initial (a,b) when we need to use tab[].
- *
- * We need to produce a different (a,b) every time this is called. Try all
- * reasonable cases, fastest first.
- *
- * The initial mix (which this determines) can be filled into final starting
- * at line[1]. val is set and a,b are declared. The final hash (at line[7])
- * is a^tab[b] or a^scramble[tab[b]].
- *
- * The code will probably look like this, minus some stuff:
- * val += CONSTANT;
- * val ^= (val<<16);
- * val += (val>>8);
- * val ^= (val<<4);
- * b = (val >> l) & 7;
- * a = (val + (val<<m)) >> 29;
- * return a^scramble[tab[b]];
- * Note that *a* and tab[b] will be computed in parallel by most modern chips.
- *
- * final->i is the current state of the state machine.
- * final->j and final->k are counters in the loops the states simulate.
- */
-static void hexn(keys, salt, alen, blen, final)
-key *keys;
-ub4 salt;
-ub4 alen;
-ub4 blen;
-gencode *final;
-{
- key *mykey;
- ub4 highbit = final->highbit;
- ub4 lowbit = final->lowbit;
- ub4 alog = mylog2(alen);
- ub4 blog = mylog2(blen);
-
- for (;;)
- {
- switch(final->i)
- {
- case 1:
- /* a = val>>30; b=val&3 */
- for (mykey=keys; mykey; mykey=mykey->next_k)
- {
- mykey->a_k = (mykey->hash_k << (UB4BITS-(highbit+1)))>>(UB4BITS-alog);
- mykey->b_k = (mykey->hash_k >> lowbit) & (blen-1);
- }
- if (lowbit == 0) /* hna */
- sprintf(final->line[5], " b = (val & 0x%lx);\n",
- blen-1);
- else /* hnb */
- sprintf(final->line[5], " b = ((val >> %ld) & 0x%lx);\n",
- lowbit, blen-1);
- if (highbit+1 == UB4BITS) /* hnc */
- sprintf(final->line[6], " a = (val >> %ld);\n",
- UB4BITS-alog);
- else /* hnd */
- sprintf(final->line[6], " a = ((val << %ld ) >> %ld);\n",
- UB4BITS-(highbit+1), UB4BITS-alog);
-
- ++final->i;
- return;
-
- case 2:
- /* a = val&3; b=val>>30 */
- for (mykey=keys; mykey; mykey=mykey->next_k)
- {
- mykey->a_k = (mykey->hash_k >> lowbit) & (alen-1);
- mykey->b_k = (mykey->hash_k << (UB4BITS-(highbit+1)))>>(UB4BITS-blog);
- }
- if (highbit+1 == UB4BITS) /* hne */
- sprintf(final->line[5], " b = (val >> %ld);\n",
- UB4BITS-blog);
- else /* hnf */
- sprintf(final->line[5], " b = ((val << %ld ) >> %ld);\n",
- UB4BITS-(highbit+1), UB4BITS-blog);
- if (lowbit == 0) /* hng */
- sprintf(final->line[6], " a = (val & 0x%lx);\n",
- alen-1);
- else /* hnh */
- sprintf(final->line[6], " a = ((val >> %ld) & 0x%lx);\n",
- lowbit, alen-1);
-
- ++final->i;
- return;
-
- case 3:
- /*
- * cases 3,4,5:
- * for (k=lowbit; k<=highbit; ++k)
- * for (j=lowbit; j<=highbit; ++j)
- * b = (val>>j)&3;
- * a = (val<<k)>>30;
- */
- final->k = lowbit;
- final->j = lowbit;
- ++final->i;
- break;
-
- case 4:
- if (!(final->j < highbit))
- {
- ++final->i;
- break;
- }
- for (mykey=keys; mykey; mykey=mykey->next_k)
- {
- mykey->b_k = (mykey->hash_k >> (final->j)) & (blen-1);
- mykey->a_k = (mykey->hash_k << (UB4BITS-final->k-1)) >> (UB4BITS-alog);
- }
- if (final->j == 0) /* hni */
- sprintf(final->line[5], " b = val & 0x%lx;\n",
- blen-1);
- else if (blog+final->j == UB4BITS) /* hnja */
- sprintf(final->line[5], " b = val >> %ld;\n",
- final->j);
- else
- sprintf(final->line[5], " b = (val >> %ld) & 0x%lx;\n", /* hnj */
- final->j, blen-1);
- if (UB4BITS-final->k-1 == 0) /* hnk */
- sprintf(final->line[6], " a = (val >> %ld);\n",
- UB4BITS-alog);
- else /* hnl */
- sprintf(final->line[6], " a = ((val << %ld) >> %ld);\n",
- UB4BITS-final->k-1, UB4BITS-alog);
- while (++final->j < highbit)
- {
- if (((final->diffbits>>(final->j)) & (blen-1)) > 2)
- break;
- }
- return;
-
- case 5:
- while (++final->k < highbit)
- {
- if ((((final->diffbits<<(UB4BITS-final->k-1))>>alog) & (alen-1)) > 0)
- break;
- }
- if (!(final->k < highbit))
- {
- ++final->i;
- break;
- }
- final->j = lowbit;
- final->i = 4;
- break;
-
-
- case 6:
- /*
- * cases 6,7,8:
- * for (k=0; k<UB4BITS-alog; ++k)
- * for (j=0; j<UB4BITS-blog; ++j)
- * val = val+f(salt);
- * val ^= (val >> 16);
- * val += (val << 8);
- * val ^= (val >> 4);
- * b = (val >> j) & 3;
- * a = (val + (val << k)) >> 30;
- */
- final->k = 0;
- final->j = 0;
- ++final->i;
- break;
-
- case 7:
- /* Just do something that will surely work */
- {
- ub4 addk = 0x9e3779b9*salt;
-
- if (!(final->j <= UB4BITS-blog))
- {
- ++final->i;
- break;
- }
- for (mykey=keys; mykey; mykey=mykey->next_k)
- {
- ub4 val = mykey->hash_k + addk;
- if (final->highbit+1 - final->lowbit > 16)
- val ^= (val >> 16);
- if (final->highbit+1 - final->lowbit > 8)
- val += (val << 8);
- val ^= (val >> 4);
- mykey->b_k = (val >> final->j) & (blen-1);
- if (final->k == 0)
- mykey->a_k = val >> (UB4BITS-alog);
- else
- mykey->a_k = (val + (val << final->k)) >> (UB4BITS-alog);
- }
- sprintf(final->line[1], " val += 0x%lx;\n", addk);
- if (final->highbit+1 - final->lowbit > 16) /* hnm */
- sprintf(final->line[2], " val ^= (val >> 16);\n");
- if (final->highbit+1 - final->lowbit > 8) /* hnn */
- sprintf(final->line[3], " val += (val << 8);\n");
- sprintf(final->line[4], " val ^= (val >> 4);\n");
- if (final->j == 0) /* hno: don't know how to reach this */
- sprintf(final->line[5], " b = val & 0x%lx;\n", blen-1);
- else /* hnp */
- sprintf(final->line[5], " b = (val >> %ld) & 0x%lx;\n",
- final->j, blen-1);
- if (final->k == 0) /* hnq */
- sprintf(final->line[6], " a = val >> %ld;\n", UB4BITS-alog);
- else /* hnr */
- sprintf(final->line[6], " a = (val + (val << %ld)) >> %ld;\n",
- final->k, UB4BITS-alog);
-
- ++final->j;
- return;
- }
-
- case 8:
- ++final->k;
- if (!(final->k <= UB4BITS-alog))
- {
- ++final->i;
- break;
- }
- final->j = 0;
- final->i = 7;
- break;
-
- case 9:
- final->i = 6;
- break;
- }
- }
-}
-
-
-
-/* find the highest and lowest bit where any key differs */
-static void setlow(keys, final)
-key *keys;
-gencode *final;
-{
- ub4 lowbit;
- ub4 highbit;
- ub4 i;
- key *mykey;
- ub4 firstkey;
-
- /* mark the interesting bits in final->mask */
- final->diffbits = (ub4)0;
- if (keys) firstkey = keys->hash_k;
- for (mykey=keys; mykey!=(key *)0; mykey=mykey->next_k)
- final->diffbits |= (firstkey ^ mykey->hash_k);
-
- /* find the lowest interesting bit */
- for (i=0; i<UB4BITS; ++i)
- if (final->diffbits & (((ub4)1)<<i))
- break;
- final->lowbit = i;
-
- /* find the highest interesting bit */
- for (i=UB4BITS; --i; )
- if (final->diffbits & (((ub4)1)<<i))
- break;
- final->highbit = i;
-}
-
-/*
- * Initialize (a,b) when keys are integers.
- *
- * Normally there's an initial hash which produces a number. That hash takes
- * an initializer. Changing the initializer causes the initial hash to
- * produce a different (uniformly distributed) number without any extra work.
- *
- * Well, here we start with a number. There's no initial hash. Any mixing
- * costs extra work. So we go through a lot of special cases to minimize the
- * mixing needed to get distinct (a,b). For small sets of keys, it's often
- * fastest to skip the final hash and produce the perfect hash from the number
- * directly.
- *
- * The target user for this is switch statement optimization. The common case
- * is 3 to 16 keys, and instruction counts matter. The competition is a
- * binary tree of branches.
- *
- * Return TRUE if we found a perfect hash and no more work is needed.
- * Return FALSE if we just did an initial hash and more work is needed.
- */
-int inithex(keys, nkeys, alen, blen, smax, salt, final, form)
-key *keys; /* list of all keys */
-ub4 nkeys; /* number of keys to hash */
-ub4 alen; /* (a,b) has a in 0..alen-1, a power of 2 */
-ub4 blen; /* (a,b) has b in 0..blen-1, a power of 2 */
-ub4 smax; /* maximum range of computable hash values */
-ub4 salt; /* used to initialize the hash function */
-gencode *final; /* output, code for the final hash */
-hashform *form; /* user directives */
-{
- setlow(keys, final);
-
- switch (nkeys)
- {
- case 1:
- hexone(keys, final);
- return TRUE;
- case 2:
- hextwo(keys, final);
- return TRUE;
- case 3:
- hexthree(keys, final, form);
- return TRUE;
- case 4:
- hexfour(keys, final);
- return TRUE;
- case 5: case 6: case 7: case 8:
- if (salt == 1 && /* first time through */
- hexeight(keys, nkeys, final, form)) /* get lucky, don't need tab[] ? */
- return TRUE;
- /* fall through */
- default:
- if (salt == 1)
- {
- final->used = 8;
- final->i = 1;
- final->j = final->k = final->l = final->m = final->n = final->o = 0;
- sprintf(final->line[0], " ub4 a, b, rsl;\n");
- sprintf(final->line[1], "\n");
- sprintf(final->line[2], "\n");
- sprintf(final->line[3], "\n");
- sprintf(final->line[4], "\n");
- sprintf(final->line[5], "\n");
- sprintf(final->line[6], "\n");
- if (blen < USE_SCRAMBLE)
- { /* hns */
- sprintf(final->line[7], " rsl = (a^tab[b]);\n");
- }
- else
- { /* hnt */
- sprintf(final->line[7], " rsl = (a^scramble[tab[b]]);\n");
- }
- }
- hexn(keys, salt, alen, blen, final);
- return FALSE;
- }
-}
diff --git a/tools/codegen/core/perfect/recycle.c b/tools/codegen/core/perfect/recycle.c
deleted file mode 100644
index 3f857cba7d..0000000000
--- a/tools/codegen/core/perfect/recycle.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
---------------------------------------------------------------------
-By Bob Jenkins, September 1996. recycle.c
-You may use this code in any way you wish, and it is free. No warranty.
-
-This manages memory for commonly-allocated structures.
-It allocates RESTART to REMAX items at a time.
-Timings have shown that, if malloc is used for every new structure,
- malloc will consume about 90% of the time in a program. This
- module cuts down the number of mallocs by an order of magnitude.
-This also decreases memory fragmentation, and freeing structures
- only requires freeing the root.
---------------------------------------------------------------------
-*/
-
-#ifndef STANDARD
-# include "standard.h"
-#endif
-#ifndef RECYCLE
-# include "recycle.h"
-#endif
-
-reroot *remkroot(size)
-size_t size;
-{
- reroot *r = (reroot *)remalloc(sizeof(reroot), "recycle.c, root");
- r->list = (recycle *)0;
- r->trash = (recycle *)0;
- r->size = align(size);
- r->logsize = RESTART;
- r->numleft = 0;
- return r;
-}
-
-void refree(r)
-struct reroot *r;
-{
- recycle *temp;
- if (temp = r->list) while (r->list)
- {
- temp = r->list->next;
- free((char *)r->list);
- r->list = temp;
- }
- free((char *)r);
- return;
-}
-
-/* to be called from the macro renew only */
-char *renewx(r)
-struct reroot *r;
-{
- recycle *temp;
- if (r->trash)
- { /* pull a node off the trash heap */
- temp = r->trash;
- r->trash = temp->next;
- (void)memset((void *)temp, 0, r->size);
- }
- else
- { /* allocate a new block of nodes */
- r->numleft = r->size*((ub4)1<<r->logsize);
- if (r->numleft < REMAX) ++r->logsize;
- temp = (recycle *)remalloc(sizeof(recycle) + r->numleft,
- "recycle.c, data");
- temp->next = r->list;
- r->list = temp;
- r->numleft-=r->size;
- temp = (recycle *)((char *)(r->list+1)+r->numleft);
- }
- return (char *)temp;
-}
-
-char *remalloc(len, purpose)
-size_t len;
-char *purpose;
-{
- char *x = (char *)malloc(len);
- if (!x)
- {
- fprintf(stderr, "malloc of %d failed for %s\n",
- len, purpose);
- exit(SUCCESS);
- }
- return x;
-}
-
diff --git a/tools/codegen/core/perfect/recycle.h b/tools/codegen/core/perfect/recycle.h
deleted file mode 100644
index 7472495e84..0000000000
--- a/tools/codegen/core/perfect/recycle.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
---------------------------------------------------------------------
-By Bob Jenkins, September 1996. recycle.h
-You may use this code in any way you wish, and it is free. No warranty.
-
-This manages memory for commonly-allocated structures.
-It allocates RESTART to REMAX items at a time.
-Timings have shown that, if malloc is used for every new structure,
- malloc will consume about 90% of the time in a program. This
- module cuts down the number of mallocs by an order of magnitude.
-This also decreases memory fragmentation, and freeing all structures
- only requires freeing the root.
---------------------------------------------------------------------
-*/
-
-#ifndef STANDARD
-#include "standard.h"
-#endif
-
-#ifndef RECYCLE
-#define RECYCLE
-
-#define RESTART 0
-#define REMAX 32000
-
-struct recycle
-{
- struct recycle *next;
-};
-typedef struct recycle recycle;
-
-struct reroot
-{
- struct recycle *list; /* list of malloced blocks */
- struct recycle *trash; /* list of deleted items */
- size_t size; /* size of an item */
- size_t logsize; /* log_2 of number of items in a block */
- word numleft; /* number of bytes left in this block */
-};
-typedef struct reroot reroot;
-
-/* make a new recycling root */
-reroot *remkroot(/*_ size_t mysize _*/);
-
-/* free a recycling root and all the items it has made */
-void refree(/*_ struct reroot *r _*/);
-
-/* get a new (cleared) item from the root */
-#define renew(r) ((r)->numleft ? \
- (((char *)((r)->list+1))+((r)->numleft-=(r)->size)) : renewx(r))
-
-char *renewx(/*_ struct reroot *r _*/);
-
-/* delete an item; let the root recycle it */
-/* void redel(/o_ struct reroot *r, struct recycle *item _o/); */
-#define redel(root,item) { \
- ((recycle *)item)->next=(root)->trash; \
- (root)->trash=(recycle *)(item); \
-}
-
-/* malloc, but complain to stderr and exit program if no joy */
-/* use plain free() to free memory allocated by remalloc() */
-char *remalloc(/*_ size_t len, char *purpose _*/);
-
-#endif /* RECYCLE */
diff --git a/tools/codegen/core/perfect/run.sh b/tools/codegen/core/perfect/run.sh
deleted file mode 100755
index c0d1fc3b81..0000000000
--- a/tools/codegen/core/perfect/run.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-cd $(dirname $0)
-fn=$1
-shift
-./perfect $* < $fn
diff --git a/tools/codegen/core/perfect/standard.h b/tools/codegen/core/perfect/standard.h
deleted file mode 100644
index 202a5d658c..0000000000
--- a/tools/codegen/core/perfect/standard.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-------------------------------------------------------------------------------
-Standard definitions and types, Bob Jenkins
-------------------------------------------------------------------------------
-*/
-#ifndef STANDARD
-# define STANDARD
-# ifndef STDIO
-# include <stdio.h>
-# define STDIO
-# endif
-# ifndef STDDEF
-# include <stddef.h>
-# define STDDEF
-# endif
-typedef unsigned long long ub8;
-#define UB8MAXVAL 0xffffffffffffffffLL
-#define UB8BITS 64
-typedef signed long long sb8;
-#define SB8MAXVAL 0x7fffffffffffffffLL
-typedef unsigned long int ub4; /* unsigned 4-byte quantities */
-#define UB4MAXVAL 0xffffffff
-typedef signed long int sb4;
-#define UB4BITS 32
-#define SB4MAXVAL 0x7fffffff
-typedef unsigned short int ub2;
-#define UB2MAXVAL 0xffff
-#define UB2BITS 16
-typedef signed short int sb2;
-#define SB2MAXVAL 0x7fff
-typedef unsigned char ub1;
-#define UB1MAXVAL 0xff
-#define UB1BITS 8
-typedef signed char sb1; /* signed 1-byte quantities */
-#define SB1MAXVAL 0x7f
-typedef int word; /* fastest type available */
-
-#define bis(target,mask) ((target) |= (mask))
-#define bic(target,mask) ((target) &= ~(mask))
-#define bit(target,mask) ((target) & (mask))
-#ifndef min
-# define min(a,b) (((a)<(b)) ? (a) : (b))
-#endif /* min */
-#ifndef max
-# define max(a,b) (((a)<(b)) ? (b) : (a))
-#endif /* max */
-#ifndef align
-# define align(a) (((ub4)a+(sizeof(void *)-1))&(~(sizeof(void *)-1)))
-#endif /* align */
-#ifndef abs
-# define abs(a) (((a)>0) ? (a) : -(a))
-#endif
-#define TRUE 1
-#define FALSE 0
-#define SUCCESS 0 /* 1 on VAX */
-
-#endif /* STANDARD */