summaryrefslogtreecommitdiff
path: root/lib/util.ml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/util.ml')
-rw-r--r--lib/util.ml540
1 files changed, 480 insertions, 60 deletions
diff --git a/lib/util.ml b/lib/util.ml
index 16d73430..9fa92f94 100644
--- a/lib/util.ml
+++ b/lib/util.ml
@@ -6,7 +6,7 @@
(* * GNU Lesser General Public License Version 2.1 *)
(***********************************************************************)
-(* $Id: util.ml 10185 2007-10-06 18:05:13Z herbelin $ *)
+(* $Id: util.ml 11083 2008-06-09 22:08:14Z herbelin $ *)
open Pp
@@ -26,19 +26,27 @@ type loc = Compat.loc
let dummy_loc = Compat.dummy_loc
let unloc = Compat.unloc
let make_loc = Compat.make_loc
+let join_loc = Compat.join_loc
(* raising located exceptions *)
type 'a located = loc * 'a
let anomaly_loc (loc,s,strm) = Stdpp.raise_with_loc loc (Anomaly (s,strm))
let user_err_loc (loc,s,strm) = Stdpp.raise_with_loc loc (UserError (s,strm))
let invalid_arg_loc (loc,s) = Stdpp.raise_with_loc loc (Invalid_argument s)
-let join_loc = Compat.join_loc
+
+let located_fold_left f x (_,a) = f x a
+let located_iter2 f (_,a) (_,b) = f a b
(* Like Exc_located, but specifies the outermost file read, the filename
associated to the location of the error, and the error itself. *)
exception Error_in_file of string * (bool * string * loc) * exn
+(* Mapping under pairs *)
+
+let on_fst f (a,b) = (f a,b)
+let on_snd f (a,b) = (a,f b)
+
(* Projections from triplets *)
let pi1 (a,_,_) = a
@@ -47,15 +55,8 @@ let pi3 (_,_,a) = a
(* Characters *)
-let is_letter c =
- (c >= 'a' && c <= 'z') or
- (c >= 'A' && c <= 'Z') or
- (c >= '\248' && c <= '\255') or
- (c >= '\192' && c <= '\214') or
- (c >= '\216' && c <= '\246')
-
+let is_letter c = (c >= 'a' && c <= 'z') or (c >= 'A' && c <= 'Z')
let is_digit c = (c >= '0' && c <= '9')
-
let is_ident_tail c =
is_letter c or is_digit c or c = '\'' or c = '_'
@@ -99,6 +100,10 @@ let string_string_contains ~where ~what =
let plural n s = if n>1 then s^"s" else s
+let ordinal n =
+ let s = match n mod 10 with 1 -> "st" | 2 -> "nd" | 3 -> "rd" | _ -> "th" in
+ string_of_int n ^ s
+
(* string parsing *)
let parse_loadpath s =
@@ -119,9 +124,329 @@ module Stringset = Set.Make(struct type t = string let compare = compare end)
module Stringmap = Map.Make(struct type t = string let compare = compare end)
-(* Lists *)
+type utf8_status = UnicodeLetter | UnicodeIdentPart | UnicodeSymbol
+
+exception UnsupportedUtf8
+
+let classify_unicode unicode =
+ match unicode land 0x1F000 with
+ | 0x0 ->
+ begin match unicode with
+ (* utf-8 Basic Latin underscore *)
+ | x when x = 0x005F -> UnicodeLetter
+ (* utf-8 Basic Latin letters *)
+ | x when 0x0041 <= x & x <= 0x005A -> UnicodeLetter
+ | x when 0x0061 <= x & x <= 0x007A -> UnicodeLetter
+ (* utf-8 Basic Latin digits and quote *)
+ | x when 0x0030 <= x & x <= 0x0039 or x = 0x0027 -> UnicodeIdentPart
+ (* utf-8 Basic Latin symbols *)
+ | x when x <= 0x007F -> UnicodeSymbol
+ (* utf-8 Latin-1 non breaking space U00A0 *)
+ | 0x00A0 -> UnicodeLetter
+ (* utf-8 Latin-1 symbols U00A1-00BF *)
+ | x when 0x00A0 <= x & x <= 0x00BF -> UnicodeSymbol
+ (* utf-8 Latin-1 letters U00C0-00D6 *)
+ | x when 0x00C0 <= x & x <= 0x00D6 -> UnicodeLetter
+ (* utf-8 Latin-1 symbol U00D7 *)
+ | 0x00D7 -> UnicodeSymbol
+ (* utf-8 Latin-1 letters U00D8-00F6 *)
+ | x when 0x00D8 <= x & x <= 0x00F6 -> UnicodeLetter
+ (* utf-8 Latin-1 symbol U00F7 *)
+ | 0x00F7 -> UnicodeSymbol
+ (* utf-8 Latin-1 letters U00F8-00FF *)
+ | x when 0x00F8 <= x & x <= 0x00FF -> UnicodeLetter
+ (* utf-8 Latin Extended A U0100-017F and Latin Extended B U0180-U0241 *)
+ | x when 0x0100 <= x & x <= 0x0241 -> UnicodeLetter
+ (* utf-8 Phonetic letters U0250-02AF *)
+ | x when 0x0250 <= x & x <= 0x02AF -> UnicodeLetter
+ (* utf-8 what do to with diacritics U0300-U036F ? *)
+ (* utf-8 Greek letters U0380-03FF *)
+ | x when 0x0380 <= x & x <= 0x03FF -> UnicodeLetter
+ (* utf-8 Cyrillic letters U0400-0481 *)
+ | x when 0x0400 <= x & x <= 0x0481 -> UnicodeLetter
+ (* utf-8 Cyrillic symbol U0482 *)
+ | 0x0482 -> UnicodeSymbol
+ (* utf-8 what do to with diacritics U0483-U0489 \ U0487 ? *)
+ (* utf-8 Cyrillic letters U048A-U4F9 (Warning: 04CF) *)
+ | x when 0x048A <= x & x <= 0x04F9 -> UnicodeLetter
+ (* utf-8 Cyrillic supplement letters U0500-U050F *)
+ | x when 0x0500 <= x & x <= 0x050F -> UnicodeLetter
+ (* utf-8 Hebrew letters U05D0-05EA *)
+ | x when 0x05D0 <= x & x <= 0x05EA -> UnicodeLetter
+ (* utf-8 Arabic letters U0621-064A *)
+ | x when 0x0621 <= x & x <= 0x064A -> UnicodeLetter
+ (* utf-8 Arabic supplement letters U0750-076D *)
+ | x when 0x0750 <= x & x <= 0x076D -> UnicodeLetter
+ | _ -> raise UnsupportedUtf8
+ end
+ | 0x1000 ->
+ begin match unicode with
+ (* utf-8 Georgian U10A0-10FF (has holes) *)
+ | x when 0x10A0 <= x & x <= 0x10FF -> UnicodeLetter
+ (* utf-8 Hangul Jamo U1100-11FF (has holes) *)
+ | x when 0x1100 <= x & x <= 0x11FF -> UnicodeLetter
+ (* utf-8 Latin additional letters U1E00-1E9B and U1EA0-1EF9 *)
+ | x when 0x1E00 <= x & x <= 0x1E9B -> UnicodeLetter
+ | x when 0x1EA0 <= x & x <= 0x1EF9 -> UnicodeLetter
+ | _ -> raise UnsupportedUtf8
+ end
+ | 0x2000 ->
+ begin match unicode with
+ (* utf-8 general punctuation U2080-2089 *)
+ (* Hyphens *)
+ | x when 0x2010 <= x & x <= 0x2011 -> UnicodeLetter
+ (* Dashes and other symbols *)
+ | x when 0x2012 <= x & x <= 0x2027 -> UnicodeSymbol
+ (* Per mille and per ten thousand signs *)
+ | x when 0x2030 <= x & x <= 0x2031 -> UnicodeSymbol
+ (* Prime letters *)
+ | x when 0x2032 <= x & x <= 0x2034 or x = 0x2057 -> UnicodeIdentPart
+ (* Miscellaneous punctuation *)
+ | x when 0x2039 <= x & x <= 0x2056 -> UnicodeSymbol
+ | x when 0x2058 <= x & x <= 0x205E -> UnicodeSymbol
+ (* Invisible mathematical operators *)
+ | x when 0x2061 <= x & x <= 0x2063 -> UnicodeSymbol
+ (* utf-8 superscript U2070-207C *)
+ | x when 0x2070 <= x & x <= 0x207C -> UnicodeSymbol
+ (* utf-8 subscript U2080-2089 *)
+ | x when 0x2080 <= x & x <= 0x2089 -> UnicodeIdentPart
+ (* utf-8 letter-like U2100-214F *)
+ | x when 0x2100 <= x & x <= 0x214F -> UnicodeLetter
+ (* utf-8 number-forms U2153-2183 *)
+ | x when 0x2153 <= x & x <= 0x2183 -> UnicodeSymbol
+ (* utf-8 arrows A U2190-21FF *)
+ (* utf-8 mathematical operators U2200-22FF *)
+ (* utf-8 miscellaneous technical U2300-23FF *)
+ | x when 0x2190 <= x & x <= 0x23FF -> UnicodeSymbol
+ (* utf-8 box drawing U2500-257F has ceiling, etc. *)
+ (* utf-8 block elements U2580-259F *)
+ (* utf-8 geom. shapes U25A0-25FF (has triangles, losange, etc) *)
+ (* utf-8 miscellaneous symbols U2600-26FF *)
+ | x when 0x2500 <= x & x <= 0x26FF -> UnicodeSymbol
+ (* utf-8 arrows B U2900-297F *)
+ | x when 0x2900 <= x & x <= 0x297F -> UnicodeSymbol
+ (* utf-8 mathematical operators U2A00-2AFF *)
+ | x when 0x2A00 <= x & x <= 0x2AFF -> UnicodeSymbol
+ (* utf-8 bold symbols U2768-U2775 *)
+ | x when 0x2768 <= x & x <= 0x2775 -> UnicodeSymbol
+ (* utf-8 arrows and brackets U27E0-U27FF *)
+ | x when 0x27E0 <= x & x <= 0x27FF -> UnicodeSymbol
+ (* utf-8 brackets, braces and parentheses *)
+ | x when 0x2980 <= x & x <= 0x299F -> UnicodeSymbol
+ (* utf-8 miscellaneous including double-plus U29F0-U29FF *)
+ | x when 0x29F0 <= x & x <= 0x29FF -> UnicodeSymbol
+ | _ -> raise UnsupportedUtf8
+ end
+ | _ ->
+ begin match unicode with
+ (* utf-8 Hiragana U3040-309F and Katakana U30A0-30FF *)
+ | x when 0x3040 <= x & x <= 0x30FF -> UnicodeLetter
+ (* utf-8 Unified CJK Ideographs U4E00-9FA5 *)
+ | x when 0x4E00 <= x & x <= 0x9FA5 -> UnicodeLetter
+ (* utf-8 Hangul syllables UAC00-D7AF *)
+ | x when 0xAC00 <= x & x <= 0xD7AF -> UnicodeLetter
+ (* utf-8 Gothic U10330-1034A *)
+ | x when 0x10330 <= x & x <= 0x1034A -> UnicodeLetter
+ (* utf-8 Math Alphanumeric Symbols U1D400-1D7FF (letters) (has holes) *)
+ | x when 0x1D400 <= x & x <= 0x1D7CB -> UnicodeLetter
+ (* utf-8 Math Alphanumeric Symbols U1D400-1D7FF (digits) *)
+ | x when 0x1D7CE <= x & x <= 0x1D7FF -> UnicodeIdentPart
+ | _ -> raise UnsupportedUtf8
+ end
-let list_add_set x l = if List.mem x l then l else x::l
+exception End_of_input
+
+let utf8_of_unicode n =
+ if n < 128 then
+ String.make 1 (Char.chr n)
+ else if n < 2048 then
+ let s = String.make 2 (Char.chr (128 + n mod 64)) in
+ begin
+ s.[0] <- Char.chr (192 + n / 64);
+ s
+ end
+ else if n < 65536 then
+ let s = String.make 3 (Char.chr (128 + n mod 64)) in
+ begin
+ s.[1] <- Char.chr (128 + (n / 64) mod 64);
+ s.[0] <- Char.chr (224 + n / 4096);
+ s
+ end
+ else
+ let s = String.make 4 (Char.chr (128 + n mod 64)) in
+ begin
+ s.[2] <- Char.chr (128 + (n / 64) mod 64);
+ s.[1] <- Char.chr (128 + (n / 4096) mod 64);
+ s.[0] <- Char.chr (240 + n / 262144);
+ s
+ end
+
+let next_utf8 s i =
+ let err () = invalid_arg "utf8" in
+ let l = String.length s - i in
+ if l = 0 then raise End_of_input
+ else let a = Char.code s.[i] in if a <= 0x7F then
+ 1, a
+ else if a land 0x40 = 0 or l = 1 then err ()
+ else let b = Char.code s.[i+1] in if b land 0xC0 <> 0x80 then err ()
+ else if a land 0x20 = 0 then
+ 2, (a land 0x1F) lsl 6 + (b land 0x3F)
+ else if l = 2 then err ()
+ else let c = Char.code s.[i+2] in if c land 0xC0 <> 0x80 then err ()
+ else if a land 0x10 = 0 then
+ 3, (a land 0x0F) lsl 12 + (b land 0x3F) lsl 6 + (c land 0x3F)
+ else if l = 3 then err ()
+ else let d = Char.code s.[i+3] in if d land 0xC0 <> 0x80 then err ()
+ else if a land 0x08 = 0 then
+ 4, (a land 0x07) lsl 18 + (b land 0x3F) lsl 12 +
+ (c land 0x3F) lsl 6 + (d land 0x3F)
+ else err ()
+
+(* Check the well-formedness of an identifier *)
+
+let check_ident s =
+ let i = ref 0 in
+ if s <> ".." then try
+ let j, n = next_utf8 s 0 in
+ match classify_unicode n with
+ | UnicodeLetter ->
+ i := !i + j;
+ begin try
+ while true do
+ let j, n = next_utf8 s !i in
+ match classify_unicode n with
+ | UnicodeLetter | UnicodeIdentPart -> i := !i + j
+ | _ -> error
+ ("invalid character "^(String.sub s !i j)^" in identifier "^s)
+ done
+ with End_of_input -> () end
+ | _ -> error (s^": an identifier should start with a letter")
+ with
+ | End_of_input -> error "The empty string is not an identifier"
+ | UnsupportedUtf8 -> error (s^": unsupported character in utf8 sequence")
+ | Invalid_argument _ -> error (s^": invalid utf8 sequence")
+
+let lowercase_unicode s unicode =
+ match unicode land 0x1F000 with
+ | 0x0 ->
+ begin match unicode with
+ (* utf-8 Basic Latin underscore *)
+ | x when x = 0x005F -> x
+ (* utf-8 Basic Latin letters *)
+ | x when 0x0041 <= x & x <= 0x005A -> x + 32
+ | x when 0x0061 <= x & x <= 0x007A -> x
+ (* utf-8 Latin-1 non breaking space U00A0 *)
+ | 0x00A0 as x -> x
+ (* utf-8 Latin-1 letters U00C0-00D6 *)
+ | x when 0x00C0 <= x & x <= 0x00D6 -> x + 32
+ (* utf-8 Latin-1 letters U00D8-00F6 *)
+ | x when 0x00D8 <= x & x <= 0x00DE -> x + 32
+ | x when 0x00E0 <= x & x <= 0x00F6 -> x
+ (* utf-8 Latin-1 letters U00F8-00FF *)
+ | x when 0x00F8 <= x & x <= 0x00FF -> x
+ (* utf-8 Latin Extended A U0100-017F and Latin Extended B U0180-U0241 *)
+ | x when 0x0100 <= x & x <= 0x017F ->
+ if x mod 2 = 1 then x else x + 1
+ | x when 0x0180 <= x & x <= 0x0241 ->
+ warning ("Unable to decide which lowercase letter to map to "^s); x
+ (* utf-8 Phonetic letters U0250-02AF *)
+ | x when 0x0250 <= x & x <= 0x02AF -> x
+ (* utf-8 what do to with diacritics U0300-U036F ? *)
+ (* utf-8 Greek letters U0380-03FF *)
+ | x when 0x0380 <= x & x <= 0x0385 -> x
+ | 0x0386 -> 0x03AC
+ | x when 0x0388 <= x & x <= 0x038A -> x + 37
+ | 0x038C -> 0x03CC
+ | x when 0x038E <= x & x <= 0x038F -> x + 63
+ | x when 0x0390 <= x & x <= 0x03AB & x <> 0x03A2 -> x + 32
+ (* utf-8 Greek lowercase letters U03B0-03CE *)
+ | x when 0x03AC <= x & x <= 0x03CE -> x
+ | x when 0x03CF <= x & x <= 0x03FF ->
+ warning ("Unable to decide which lowercase letter to map to "^s); x
+ (* utf-8 Cyrillic letters U0400-0481 *)
+ | x when 0x0400 <= x & x <= 0x040F -> x + 80
+ | x when 0x0410 <= x & x <= 0x042F -> x + 32
+ | x when 0x0430 <= x & x <= 0x045F -> x
+ | x when 0x0460 <= x & x <= 0x0481 ->
+ if x mod 2 = 1 then x else x + 1
+ (* utf-8 Cyrillic letters U048A-U4F9 (Warning: 04CF) *)
+ | x when 0x048A <= x & x <= 0x04F9 & x <> 0x04CF ->
+ if x mod 2 = 1 then x else x + 1
+ (* utf-8 Cyrillic supplement letters U0500-U050F *)
+ | x when 0x0500 <= x & x <= 0x050F ->
+ if x mod 2 = 1 then x else x + 1
+ (* utf-8 Hebrew letters U05D0-05EA *)
+ | x when 0x05D0 <= x & x <= 0x05EA -> x
+ (* utf-8 Arabic letters U0621-064A *)
+ | x when 0x0621 <= x & x <= 0x064A -> x
+ (* utf-8 Arabic supplement letters U0750-076D *)
+ | x when 0x0750 <= x & x <= 0x076D -> x
+ | _ -> raise UnsupportedUtf8
+ end
+ | 0x1000 ->
+ begin match unicode with
+ (* utf-8 Georgian U10A0-10FF (has holes) *)
+ | x when 0x10A0 <= x & x <= 0x10FF -> x
+ (* utf-8 Hangul Jamo U1100-11FF (has holes) *)
+ | x when 0x1100 <= x & x <= 0x11FF -> x
+ (* utf-8 Latin additional letters U1E00-1E9B and U1EA0-1EF9 *)
+ | x when 0x1E00 <= x & x <= 0x1E95 ->
+ if x mod 2 = 1 then x else x + 1
+ | x when 0x1E96 <= x & x <= 0x1E9B -> x
+ | x when 0x1EA0 <= x & x <= 0x1EF9 ->
+ if x mod 2 = 1 then x else x + 1
+ | _ -> raise UnsupportedUtf8
+ end
+ | 0x2000 ->
+ begin match unicode with
+ (* utf-8 general punctuation U2080-2089 *)
+ (* Hyphens *)
+ | x when 0x2010 <= x & x <= 0x2011 -> x
+ (* utf-8 letter-like U2100-214F *)
+ | 0x2102 (* double-struck C *) -> Char.code 'x'
+ | 0x2115 (* double-struck N *) -> Char.code 'n'
+ | 0x2119 (* double-struck P *) -> Char.code 'x'
+ | 0x211A (* double-struck Q *) -> Char.code 'x'
+ | 0x211D (* double-struck R *) -> Char.code 'r'
+ | 0x2124 (* double-struck Z *) -> Char.code 'x'
+ | x when 0x2100 <= x & x <= 0x214F ->
+ warning ("Unable to decide which lowercase letter to map to "^s); x
+ | _ -> raise UnsupportedUtf8
+ end
+ | _ ->
+ begin match unicode with
+ (* utf-8 Hiragana U3040-309F and Katakana U30A0-30FF *)
+ | x when 0x3040 <= x & x <= 0x30FF -> x
+ (* utf-8 Unified CJK Ideographs U4E00-9FA5 *)
+ | x when 0x4E00 <= x & x <= 0x9FA5 -> x
+ (* utf-8 Hangul syllables UAC00-D7AF *)
+ | x when 0xAC00 <= x & x <= 0xD7AF -> x
+ (* utf-8 Gothic U10330-1034A *)
+ | x when 0x10330 <= x & x <= 0x1034A -> x
+ (* utf-8 Math Alphanumeric Symbols U1D400-1D7FF (letters) (has holes) *)
+ | x when 0x1D6A8 <= x & x <= 0x1D7C9 ->
+ let a = (x - 0x1D6A8) mod 58 in
+ if a <= 16 or (18 <= a & a <= 24)
+ then x + 26 (* all but nabla and theta symbol *)
+ else x
+ | x when 0x1D538 <= x & x <= 0x1D56B ->
+ (* Use ordinary lowercase in both small and capital double-struck *)
+ (x - 0x1D538) mod 26 + Char.code 'a'
+ | x when 0x1D468 <= x & x <= 0x1D6A3 -> (* General case *)
+ if (x - 0x1D400 / 26) mod 2 = 0 then x + 26 else x
+ | x when 0x1D400 <= x & x <= 0x1D7CB -> (* fallback *)
+ x
+ (* utf-8 Math Alphanumeric Symbols U1D400-1D7FF (digits) *)
+ | x when 0x1D7CE <= x & x <= 0x1D7FF -> x
+ | _ -> raise UnsupportedUtf8
+ end
+
+let lowercase_first_char_utf8 s =
+ assert (s <> "");
+ let j, n = next_utf8 s 0 in
+ utf8_of_unicode (lowercase_unicode (String.sub s 0 j) n)
+
+(* Lists *)
let list_intersect l1 l2 =
List.filter (fun x -> List.mem x l2) l1
@@ -160,6 +485,11 @@ let list_tabulate f len =
in
tabrec 0
+let rec list_make n v =
+ if n = 0 then []
+ else if n < 0 then invalid_arg "list_make"
+ else v::list_make (n-1) v
+
let list_assign l n e =
let rec assrec stk = function
| ((h::t), 0) -> List.rev_append stk (e::t)
@@ -205,6 +535,14 @@ let list_map3 f l1 l2 l3 =
in
map (l1,l2,l3)
+let list_map4 f l1 l2 l3 l4 =
+ let rec map = function
+ | ([], [], [], []) -> []
+ | ((h1::t1), (h2::t2), (h3::t3), (h4::t4)) -> let v = f h1 h2 h3 h4 in v::map (t1,t2,t3,t4)
+ | (_, _, _, _) -> invalid_arg "map4"
+ in
+ map (l1,l2,l3,l4)
+
let list_index x =
let rec index_x n = function
| y::l -> if x = y then n else index_x (succ n) l
@@ -212,6 +550,8 @@ let list_index x =
in
index_x 1
+let list_index0 x l = list_index x l - 1
+
let list_unique_index x =
let rec index_x n = function
| y::l ->
@@ -222,6 +562,13 @@ let list_unique_index x =
| [] -> raise Not_found
in index_x 1
+let list_fold_right_i f i l =
+ let rec it_list_f i l a = match l with
+ | [] -> a
+ | b::l -> f (i-1) b (it_list_f (i-1) l a)
+ in
+ it_list_f (List.length l + i) l
+
let list_fold_left_i f =
let rec it_list_f i a = function
| [] -> a
@@ -247,6 +594,14 @@ let rec list_fold_right_and_left f l hd =
| a::l -> let hd = aux (a::tl) l in f hd a tl
in aux [] l
+let list_iter3 f l1 l2 l3 =
+ let rec iter = function
+ | ([], [], []) -> ()
+ | ((h1::t1), (h2::t2), (h3::t3)) -> f h1 h2 h3; iter (t1,t2,t3)
+ | (_, _, _) -> invalid_arg "map3"
+ in
+ iter (l1,l2,l3)
+
let list_iter_i f l = list_fold_left_i (fun i _ x -> f i x) 0 () l
let list_for_all_i p =
@@ -265,6 +620,8 @@ let rec list_remove_first a = function
| b::l -> b::list_remove_first a l
| [] -> raise Not_found
+let list_add_set x l = if List.mem x l then l else x::l
+
let list_eq_set l1 l2 =
let rec aux l1 = function
| [] -> l1 = []
@@ -300,14 +657,46 @@ let list_try_find f =
try_find_f
let list_uniquize l =
+ let visited = Hashtbl.create 23 in
let rec aux acc = function
- | [] -> List.rev acc
- | h::t -> if List.mem h acc then aux acc t else aux (h::acc) t
+ | h::t -> if Hashtbl.mem visited h then aux acc t else
+ begin
+ Hashtbl.add visited h h;
+ aux (h::acc) t
+ end
+ | [] -> List.rev acc
in aux [] l
-let rec list_distinct = function
- | h::t -> (not (List.mem h t)) && list_distinct t
- | _ -> true
+let rec list_distinct l =
+ let visited = Hashtbl.create 23 in
+ let rec loop = function
+ | h::t ->
+ if Hashtbl.mem visited h then false
+ else
+ begin
+ Hashtbl.add visited h h;
+ loop t
+ end
+ | [] -> true
+ in loop l
+
+let rec list_merge_uniq cmp l1 l2 =
+ match l1, l2 with
+ | [], l2 -> l2
+ | l1, [] -> l1
+ | h1 :: t1, h2 :: t2 ->
+ let c = cmp h1 h2 in
+ if c = 0
+ then h1 :: list_merge_uniq cmp t1 t2
+ else if c <= 0
+ then h1 :: list_merge_uniq cmp t1 l2
+ else h2 :: list_merge_uniq cmp l1 t2
+
+let rec list_duplicates = function
+ | [] -> []
+ | x::l ->
+ let l' = list_duplicates l in
+ if List.mem x l then list_add_set x l' else l'
let rec list_filter2 f = function
| [], [] as p -> p
@@ -316,6 +705,12 @@ let rec list_filter2 f = function
if f d l then d::dp', l::lp' else p
| _ -> invalid_arg "list_filter2"
+let rec list_map_filter f = function
+ | [] -> []
+ | x::l ->
+ let l' = list_map_filter f l in
+ match f x with None -> l' | Some y -> y::l'
+
let list_subset l1 l2 =
let t2 = Hashtbl.create 151 in
List.iter (fun x -> Hashtbl.add t2 x ()) l2;
@@ -338,6 +733,14 @@ let rec list_split3 = function
| (x,y,z)::l ->
let (rx, ry, rz) = list_split3 l in (x::rx, y::ry, z::rz)
+let rec list_insert_in_class f a = function
+ | [] -> [[a]]
+ | (b::_ as l)::classes when f a b -> (a::l)::classes
+ | l::classes -> l :: list_insert_in_class f a classes
+
+let list_partition_by f l =
+ List.fold_right (list_insert_in_class f) l []
+
let list_firstn n l =
let rec aux acc = function
| (0, l) -> List.rev acc
@@ -363,6 +766,9 @@ let rec list_skipn n l = match n,l with
| _, [] -> failwith "list_fromn"
| n, _::l -> list_skipn (pred n) l
+let rec list_addn n x l =
+ if n = 0 then l else x :: (list_addn (pred n) x l)
+
let list_prefix_of prefl l =
let rec prefrec = function
| (h1::t1, h2::t2) -> h1 = h2 && prefrec (t1,t2)
@@ -384,6 +790,7 @@ let list_drop_prefix p l =
| None -> l
let list_map_append f l = List.flatten (List.map f l)
+let list_join_map = list_map_append (* Alias *)
let list_map_append2 f l1 l2 = List.flatten (List.map2 f l1 l2)
@@ -394,8 +801,6 @@ let list_share_tails l1 l2 =
in
shr_rev [] (List.rev l1, List.rev l2)
-let list_join_map f l = List.flatten (List.map f l)
-
let rec list_fold_map f e = function
| [] -> (e,[])
| h::t ->
@@ -419,6 +824,35 @@ let list_fold_map' f l e =
let list_map_assoc f = List.map (fun (x,a) -> (x,f a))
+(* Specification:
+ - =p= is set equality (double inclusion)
+ - f such that \forall l acc, (f l acc) =p= append (f l []) acc
+ - let g = fun x -> f x [] in
+ - union_map f l acc =p= append (flatten (map g l)) acc
+ *)
+let list_union_map f l acc =
+ List.fold_left
+ (fun x y -> f y x)
+ acc
+ l
+
+(* A generic cartesian product: for any operator (**),
+ [list_cartesian (**) [x1;x2] [y1;y2] = [x1**y1; x1**y2; x2**y1; x2**y1]],
+ and so on if there are more elements in the lists. *)
+
+let rec list_cartesian op l1 l2 =
+ list_map_append (fun x -> List.map (op x) l2) l1
+
+(* [list_cartesians] is an n-ary cartesian product: it iterates
+ [list_cartesian] over a list of lists. *)
+
+let list_cartesians op init ll =
+ List.fold_right (list_cartesian op) ll [init]
+
+(* list_combinations [[a;b];[c;d]] gives [[a;c];[a;d];[b;c];[b;d]] *)
+
+let list_combinations l = list_cartesians (fun x l -> x::l) [] l
+
(* Arrays *)
let array_exists f v =
@@ -645,6 +1079,11 @@ let array_map_left_pair f a g b =
r, s
end
+let array_iter2 f v1 v2 =
+ let n = Array.length v1 in
+ if Array.length v2 <> n then invalid_arg "array_iter2"
+ else for i = 0 to n - 1 do f v1.(i) v2.(i) done
+
let pure_functional = false
let array_fold_map' f v e =
@@ -659,6 +1098,11 @@ else
let v' = Array.map (fun x -> let (y,e) = f x !e' in e' := e; y) v in
(v',!e')
+let array_fold_map f e v =
+ let e' = ref e in
+ let v' = Array.map (fun x -> let (e,y) = f !e' x in e' := e; y) v in
+ (!e',v')
+
let array_fold_map2' f v1 v2 e =
let e' = ref e in
let v' =
@@ -677,6 +1121,12 @@ let array_distinct v =
with Exit ->
false
+let array_union_map f a acc =
+ Array.fold_left
+ (fun x y -> f y x)
+ acc
+ a
+
(* Matrices *)
let matrix_transpose mat =
@@ -723,44 +1173,6 @@ let interval n m =
in
interval_n ([],m)
-let in_some x = Some x
-
-let out_some = function
- | Some x -> x
- | None -> failwith "out_some"
-
-let option_map f = function
- | None -> None
- | Some x -> Some (f x)
-
-let option_cons a l = match a with
- | Some x -> x::l
- | None -> l
-
-let option_fold_left2 f e a b = match (a,b) with
- | Some x, Some y -> f e x y
- | _ -> e
-
-let option_fold_left f e a = match a with
- | Some x -> f e x
- | _ -> e
-
-let option_fold_right f a e = match a with
- | Some x -> f x e
- | _ -> e
-
-let option_compare f a b = match (a,b) with
- | None, None -> true
- | Some a', Some b' -> f a' b'
- | _ -> failwith "option_compare"
-
-let option_iter f = function
- | None -> ()
- | Some x -> f x
-
-let option_smartmap f a = match a with
- | None -> a
- | Some x -> let x' = f x in if x'==x then a else Some x'
let map_succeed f =
let rec map_f = function
@@ -780,15 +1192,21 @@ let pr_semicolon () = str ";" ++ spc ()
let pr_bar () = str "|" ++ spc ()
let pr_arg pr x = spc () ++ pr x
let pr_opt pr = function None -> mt () | Some x -> pr_arg pr x
+let pr_opt_no_spc pr = function None -> mt () | Some x -> pr x
-let pr_ord n =
- let suff = match n mod 10 with 1 -> "st" | 2 -> "nd" | _ -> "th" in
- int n ++ str suff
+let nth n = str (ordinal n)
let rec prlist elem l = match l with
| [] -> mt ()
| h::t -> Stream.lapp (fun () -> elem h) (prlist elem t)
+(* unlike all other functions below, [prlist] works lazily.
+ if a strict behavior is needed, use [prlist_strict] instead. *)
+
+let rec prlist_strict elem l = match l with
+ | [] -> mt ()
+ | h::t -> (elem h)++(prlist_strict elem t)
+
let rec prlist_with_sep sep elem l = match l with
| [] -> mt ()
| [h] -> elem h
@@ -821,6 +1239,8 @@ let prvect_with_sep sep elem v =
let n = Array.length v in
if n = 0 then mt () else pr (n - 1)
+let prvect elem v = prvect_with_sep mt elem v
+
let surround p = hov 1 (str"(" ++ p ++ str")")
(*s Size of ocaml values. *)