aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib
diff options
context:
space:
mode:
authorGravatar puech <puech@85f007b7-540e-0410-9357-904b9bb8a0f7>2011-07-29 14:23:55 +0000
committerGravatar puech <puech@85f007b7-540e-0410-9357-904b9bb8a0f7>2011-07-29 14:23:55 +0000
commitd43147c7fd9b88af4ba42e5bf39e9c83f2c5beb2 (patch)
tree08856bf1d36e3153bdda541ffe8711679b3676a5 /lib
parent54e8894dd3a1787a89d094bb2917ac7cf72d322a (diff)
Term: Refactoring of hashconsing
- moved the alterate Hashtable module to a separate file - moved all hashconsing-related function to a separate section in Term git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@14312 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'lib')
-rw-r--r--lib/hashtbl_alt.ml111
-rw-r--r--lib/hashtbl_alt.mli38
-rw-r--r--lib/lib.mllib1
3 files changed, 150 insertions, 0 deletions
diff --git a/lib/hashtbl_alt.ml b/lib/hashtbl_alt.ml
new file mode 100644
index 000000000..667f4f719
--- /dev/null
+++ b/lib/hashtbl_alt.ml
@@ -0,0 +1,111 @@
+(************************************************************************)
+(* v * The Coq Proof Assistant / The Coq Development Team *)
+(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(************************************************************************)
+
+(* The following module is a specialized version of [Hashtbl] that is
+ a better space saver. Actually, [Hashcons] instanciates [Hashtbl.t]
+ with [constr] used both as a key and as an image. Thus, in each
+ cell of the internal bucketlist, there are two representations of
+ the same value. In this implementation, there is only one.
+
+ Besides, the responsibility of computing the hash function is now
+ given to the caller, which makes possible the interleaving of the
+ hash key computation and the hash-consing. *)
+
+module type Hashtype = sig
+ type t
+ val equals : t -> t -> bool
+end
+
+module type S = sig
+ type elt
+ (* [may_add_and_get key constr] uses [key] to look for [constr]
+ in the hash table [H]. If [constr] is [H], returns the
+ specific representation that is stored in [H]. Otherwise,
+ [constr] is stored in [H] and will be used as the canonical
+ representation of this value in the future. *)
+ val may_add_and_get : int -> elt -> elt
+ val combine : int -> int -> int
+ val combinesmall : int -> int -> int
+ val combine3 : int -> int -> int -> int
+ val combine4 : int -> int -> int -> int -> int
+end
+
+module Make (E : Hashtype) =
+ struct
+
+ type elt = E.t
+
+ type bucketlist = Empty | Cons of elt * int * bucketlist
+
+ let initial_size = 19991
+ let table_data = ref (Array.make initial_size Empty)
+ let table_size = ref 0
+
+ let resize () =
+ let odata = !table_data in
+ let osize = Array.length odata in
+ let nsize = min (2 * osize + 1) Sys.max_array_length in
+ if nsize <> osize then begin
+ let ndata = Array.create nsize Empty in
+ let rec insert_bucket = function
+ | Empty -> ()
+ | Cons (key, hash, rest) ->
+ let nidx = hash mod nsize in
+ ndata.(nidx) <- Cons (key, hash, ndata.(nidx));
+ insert_bucket rest
+ in
+ for i = 0 to osize - 1 do insert_bucket odata.(i) done;
+ table_data := ndata
+ end
+
+ let add hash key =
+ let odata = !table_data in
+ let osize = Array.length odata in
+ let i = hash mod osize in
+ odata.(i) <- Cons (key, hash, odata.(i));
+ incr table_size;
+ if !table_size > osize lsl 1 then resize ()
+
+ let find_rec hash key bucket =
+ let rec aux = function
+ | Empty ->
+ add hash key; key
+ | Cons (k, h, rest) ->
+ if hash == h && E.equals key k then k else aux rest
+ in
+ aux bucket
+
+ let may_add_and_get hash key =
+ let odata = !table_data in
+ match odata.(hash mod (Array.length odata)) with
+ | Empty -> add hash key; key
+ | Cons (k1, h1, rest1) ->
+ if hash == h1 && E.equals key k1 then k1 else
+ match rest1 with
+ | Empty -> add hash key; key
+ | Cons (k2, h2, rest2) ->
+ if hash == h2 && E.equals key k2 then k2 else
+ match rest2 with
+ | Empty -> add hash key; key
+ | Cons (k3, h3, rest3) ->
+ if hash == h2 && E.equals key k3 then k3
+ else find_rec hash key rest3
+
+ (* These are helper functions to combine the hash keys in a similar
+ way as [Hashtbl.hash] does. The constants [alpha] and [beta] must
+ be prime numbers. There were chosen empirically. Notice that the
+ problem of hashing trees is hard and there are plenty of study on
+ this topic. Therefore, there must be room for improvement here. *)
+ let alpha = 65599
+ let beta = 7
+ let combine x y = x * alpha + y
+ let combine3 x y z = combine x (combine y z)
+ let combine4 x y z t = combine x (combine3 y z t)
+ let combinesmall x y = beta * x + y
+
+end
diff --git a/lib/hashtbl_alt.mli b/lib/hashtbl_alt.mli
new file mode 100644
index 000000000..47255fc9d
--- /dev/null
+++ b/lib/hashtbl_alt.mli
@@ -0,0 +1,38 @@
+(************************************************************************)
+(* v * The Coq Proof Assistant / The Coq Development Team *)
+(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(************************************************************************)
+
+(* The following module is a specialized version of [Hashtbl] that is
+ a better space saver. Actually, [Hashcons] instanciates [Hashtbl.t]
+ with [constr] used both as a key and as an image. Thus, in each
+ cell of the internal bucketlist, there are two representations of
+ the same value. In this implementation, there is only one.
+
+ Besides, the responsibility of computing the hash function is now
+ given to the caller, which makes possible the interleaving of the
+ hash key computation and the hash-consing. *)
+
+module type Hashtype = sig
+ type t
+ val equals : t -> t -> bool
+end
+
+module type S = sig
+ type elt
+ (* [may_add_and_get key constr] uses [key] to look for [constr]
+ in the hash table [H]. If [constr] is [H], returns the
+ specific representation that is stored in [H]. Otherwise,
+ [constr] is stored in [H] and will be used as the canonical
+ representation of this value in the future. *)
+ val may_add_and_get : int -> elt -> elt
+ val combine : int -> int -> int
+ val combinesmall : int -> int -> int
+ val combine3 : int -> int -> int -> int
+ val combine4 : int -> int -> int -> int -> int
+end
+
+module Make (E : Hashtype) : S with type elt = E.t
diff --git a/lib/lib.mllib b/lib/lib.mllib
index 84180fc46..635a35396 100644
--- a/lib/lib.mllib
+++ b/lib/lib.mllib
@@ -25,3 +25,4 @@ Option
Dnet
Store
Unionfind
+Hashtbl_alt