aboutsummaryrefslogtreecommitdiffhomepage
path: root/clib/hashset.ml
diff options
context:
space:
mode:
authorGravatar Emilio Jesus Gallego Arias <e+git@x80.org>2017-12-15 18:51:45 +0100
committerGravatar Emilio Jesus Gallego Arias <e+git@x80.org>2017-12-23 19:20:30 +0100
commit5ffa147bd2fe548df3ac9053fe497d0871a5f6df (patch)
treecc62882184c34e33e2995a5a4ff4ebfcbd0defe0 /clib/hashset.ml
parentdea75d74c222c25f6aa6c38506ac7a51b339e9c6 (diff)
[lib] Split auxiliary libraries into Coq-specific and general.
Up to this point the `lib` directory contained two different library archives, `clib.cma` and `lib.cma`, which a rough splitting between Coq-specific libraries and general-purpose ones. We know split the directory in two, as to make the distinction clear: - `clib`: contains libraries that are not Coq specific and implement common data structures and programming patterns. These libraries could be eventually replace with external dependencies and the rest of the code base wouldn't notice much. - `lib`: contains Coq-specific common libraries in widespread use along the codebase, but that are not considered part of other components. Examples are printing, error handling, or flags. In some cases we have coupling due to utility files depending on Coq specific flags, however this commit doesn't modify any files, but only moves them around, further cleanup is welcome, as indeed a few files in `lib` should likely be placed in `clib`. Also note that `Deque` is not used ATM.
Diffstat (limited to 'clib/hashset.ml')
-rw-r--r--clib/hashset.ml229
1 files changed, 229 insertions, 0 deletions
diff --git a/clib/hashset.ml b/clib/hashset.ml
new file mode 100644
index 000000000..7f96627a6
--- /dev/null
+++ b/clib/hashset.ml
@@ -0,0 +1,229 @@
+(************************************************************************)
+(* v * The Coq Proof Assistant / The Coq Development Team *)
+(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2017 *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(************************************************************************)
+
+(** Adapted from Damien Doligez, projet Para, INRIA Rocquencourt,
+ OCaml stdlib. *)
+
+(** The following functor is a specialized version of [Weak.Make].
+ Here, the responsibility of computing the hash function is now
+ given to the caller, which makes possible the interleaving of the
+ hash key computation and the hash-consing. *)
+
+module type EqType = sig
+ type t
+ val eq : t -> t -> bool
+end
+
+type statistics = {
+ num_bindings: int;
+ num_buckets: int;
+ max_bucket_length: int;
+ bucket_histogram: int array
+}
+
+module type S = sig
+ type elt
+ type t
+ val create : int -> t
+ val clear : t -> unit
+ val repr : int -> elt -> t -> elt
+ val stats : t -> statistics
+end
+
+module Make (E : EqType) =
+ struct
+
+ type elt = E.t
+
+ let emptybucket = Weak.create 0
+
+ type t = {
+ mutable table : elt Weak.t array;
+ mutable hashes : int array array;
+ mutable limit : int; (* bucket size limit *)
+ mutable oversize : int; (* number of oversize buckets *)
+ mutable rover : int; (* for internal bookkeeping *)
+ }
+
+ let get_index t h = (h land max_int) mod (Array.length t.table)
+
+ let limit = 7
+ let over_limit = 2
+
+ let create sz =
+ let sz = if sz < 7 then 7 else sz in
+ let sz = if sz > Sys.max_array_length then Sys.max_array_length else sz in
+ {
+ table = Array.make sz emptybucket;
+ hashes = Array.make sz [| |];
+ limit = limit;
+ oversize = 0;
+ rover = 0;
+ }
+
+ let clear t =
+ for i = 0 to Array.length t.table - 1 do
+ t.table.(i) <- emptybucket;
+ t.hashes.(i) <- [| |];
+ done;
+ t.limit <- limit;
+ t.oversize <- 0
+
+ let iter_weak f t =
+ let rec iter_bucket i j b =
+ if i >= Weak.length b then () else
+ match Weak.check b i with
+ | true -> f b t.hashes.(j) i; iter_bucket (i+1) j b
+ | false -> iter_bucket (i+1) j b
+ in
+ for i = 0 to pred (Array.length t.table) do
+ iter_bucket 0 i (Array.unsafe_get t.table i)
+ done
+
+ let rec count_bucket i b accu =
+ if i >= Weak.length b then accu else
+ count_bucket (i+1) b (accu + (if Weak.check b i then 1 else 0))
+
+ let min x y = if x - y < 0 then x else y
+
+ let next_sz n = min (3 * n / 2 + 3) Sys.max_array_length
+ let prev_sz n = ((n - 3) * 2 + 2) / 3
+
+ let test_shrink_bucket t =
+ let bucket = t.table.(t.rover) in
+ let hbucket = t.hashes.(t.rover) in
+ let len = Weak.length bucket in
+ let prev_len = prev_sz len in
+ let live = count_bucket 0 bucket 0 in
+ if live <= prev_len then begin
+ let rec loop i j =
+ if j >= prev_len then begin
+ if Weak.check bucket i then loop (i + 1) j
+ else if Weak.check bucket j then begin
+ Weak.blit bucket j bucket i 1;
+ hbucket.(i) <- hbucket.(j);
+ loop (i + 1) (j - 1);
+ end else loop i (j - 1);
+ end;
+ in
+ loop 0 (Weak.length bucket - 1);
+ if prev_len = 0 then begin
+ t.table.(t.rover) <- emptybucket;
+ t.hashes.(t.rover) <- [| |];
+ end else begin
+ Obj.truncate (Obj.repr bucket) (prev_len + 1);
+ Obj.truncate (Obj.repr hbucket) prev_len;
+ end;
+ if len > t.limit && prev_len <= t.limit then t.oversize <- t.oversize - 1;
+ end;
+ t.rover <- (t.rover + 1) mod (Array.length t.table)
+
+ let rec resize t =
+ let oldlen = Array.length t.table in
+ let newlen = next_sz oldlen in
+ if newlen > oldlen then begin
+ let newt = create newlen in
+ let add_weak ob oh oi =
+ let setter nb ni _ = Weak.blit ob oi nb ni 1 in
+ let h = oh.(oi) in
+ add_aux newt setter None h (get_index newt h);
+ in
+ iter_weak add_weak t;
+ t.table <- newt.table;
+ t.hashes <- newt.hashes;
+ t.limit <- newt.limit;
+ t.oversize <- newt.oversize;
+ t.rover <- t.rover mod Array.length newt.table;
+ end else begin
+ t.limit <- max_int; (* maximum size already reached *)
+ t.oversize <- 0;
+ end
+
+ and add_aux t setter d h index =
+ let bucket = t.table.(index) in
+ let hashes = t.hashes.(index) in
+ let sz = Weak.length bucket in
+ let rec loop i =
+ if i >= sz then begin
+ let newsz = min (3 * sz / 2 + 3) (Sys.max_array_length - 1) in
+ if newsz <= sz then failwith "Weak.Make: hash bucket cannot grow more";
+ let newbucket = Weak.create newsz in
+ let newhashes = Array.make newsz 0 in
+ Weak.blit bucket 0 newbucket 0 sz;
+ Array.blit hashes 0 newhashes 0 sz;
+ setter newbucket sz d;
+ newhashes.(sz) <- h;
+ t.table.(index) <- newbucket;
+ t.hashes.(index) <- newhashes;
+ if sz <= t.limit && newsz > t.limit then begin
+ t.oversize <- t.oversize + 1;
+ for _i = 0 to over_limit do test_shrink_bucket t done;
+ end;
+ if t.oversize > Array.length t.table / over_limit then resize t
+ end else if Weak.check bucket i then begin
+ loop (i + 1)
+ end else begin
+ setter bucket i d;
+ hashes.(i) <- h
+ end
+ in
+ loop 0
+
+ let find_or h t d ifnotfound =
+ let index = get_index t h in
+ let bucket = t.table.(index) in
+ let hashes = t.hashes.(index) in
+ let sz = Weak.length bucket in
+ let rec loop i =
+ if i >= sz then ifnotfound index
+ else if Int.equal h hashes.(i) then begin
+ match Weak.get bucket i with
+ | Some v when E.eq v d -> v
+ | _ -> loop (i + 1)
+ end else loop (i + 1)
+ in
+ loop 0
+
+ let repr h d t =
+ let ifnotfound index = add_aux t Weak.set (Some d) h index; d in
+ find_or h t d ifnotfound
+
+ let stats t =
+ let fold accu bucket = max (count_bucket 0 bucket 0) accu in
+ let max_length = Array.fold_left fold 0 t.table in
+ let histogram = Array.make (max_length + 1) 0 in
+ let iter bucket =
+ let len = count_bucket 0 bucket 0 in
+ histogram.(len) <- succ histogram.(len)
+ in
+ let () = Array.iter iter t.table in
+ let fold (num, len, i) k = (num + k * i, len + k, succ i) in
+ let (num, len, _) = Array.fold_left fold (0, 0, 0) histogram in
+ {
+ num_bindings = num;
+ num_buckets = len;
+ max_bucket_length = Array.length histogram;
+ bucket_histogram = histogram;
+ }
+
+end
+
+module Combine = struct
+ (* These are helper functions to combine the hash keys in a similar
+ way as [Hashtbl.hash] does. The constants [alpha] and [beta] must
+ be prime numbers. There were chosen empirically. Notice that the
+ problem of hashing trees is hard and there are plenty of study on
+ this topic. Therefore, there must be room for improvement here. *)
+ let alpha = 65599
+ let beta = 7
+ let combine x y = x * alpha + y
+ let combine3 x y z = combine x (combine y z)
+ let combine4 x y z t = combine x (combine3 y z t)
+ let combine5 x y z t u = combine x (combine4 y z t u)
+ let combinesmall x y = beta * x + y
+end