From a0cfa4f118023d35b767a999d5a2ac4b082857b4 Mon Sep 17 00:00:00 2001 From: Samuel Mimram Date: Fri, 25 Jul 2008 15:12:53 +0200 Subject: Imported Upstream version 8.2~beta3+dfsg --- lib/util.ml | 540 +++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 480 insertions(+), 60 deletions(-) (limited to 'lib/util.ml') diff --git a/lib/util.ml b/lib/util.ml index 16d73430..9fa92f94 100644 --- a/lib/util.ml +++ b/lib/util.ml @@ -6,7 +6,7 @@ (* * GNU Lesser General Public License Version 2.1 *) (***********************************************************************) -(* $Id: util.ml 10185 2007-10-06 18:05:13Z herbelin $ *) +(* $Id: util.ml 11083 2008-06-09 22:08:14Z herbelin $ *) open Pp @@ -26,19 +26,27 @@ type loc = Compat.loc let dummy_loc = Compat.dummy_loc let unloc = Compat.unloc let make_loc = Compat.make_loc +let join_loc = Compat.join_loc (* raising located exceptions *) type 'a located = loc * 'a let anomaly_loc (loc,s,strm) = Stdpp.raise_with_loc loc (Anomaly (s,strm)) let user_err_loc (loc,s,strm) = Stdpp.raise_with_loc loc (UserError (s,strm)) let invalid_arg_loc (loc,s) = Stdpp.raise_with_loc loc (Invalid_argument s) -let join_loc = Compat.join_loc + +let located_fold_left f x (_,a) = f x a +let located_iter2 f (_,a) (_,b) = f a b (* Like Exc_located, but specifies the outermost file read, the filename associated to the location of the error, and the error itself. *) exception Error_in_file of string * (bool * string * loc) * exn +(* Mapping under pairs *) + +let on_fst f (a,b) = (f a,b) +let on_snd f (a,b) = (a,f b) + (* Projections from triplets *) let pi1 (a,_,_) = a @@ -47,15 +55,8 @@ let pi3 (_,_,a) = a (* Characters *) -let is_letter c = - (c >= 'a' && c <= 'z') or - (c >= 'A' && c <= 'Z') or - (c >= '\248' && c <= '\255') or - (c >= '\192' && c <= '\214') or - (c >= '\216' && c <= '\246') - +let is_letter c = (c >= 'a' && c <= 'z') or (c >= 'A' && c <= 'Z') let is_digit c = (c >= '0' && c <= '9') - let is_ident_tail c = is_letter c or is_digit c or c = '\'' or c = '_' @@ -99,6 +100,10 @@ let string_string_contains ~where ~what = let plural n s = if n>1 then s^"s" else s +let ordinal n = + let s = match n mod 10 with 1 -> "st" | 2 -> "nd" | 3 -> "rd" | _ -> "th" in + string_of_int n ^ s + (* string parsing *) let parse_loadpath s = @@ -119,9 +124,329 @@ module Stringset = Set.Make(struct type t = string let compare = compare end) module Stringmap = Map.Make(struct type t = string let compare = compare end) -(* Lists *) +type utf8_status = UnicodeLetter | UnicodeIdentPart | UnicodeSymbol + +exception UnsupportedUtf8 + +let classify_unicode unicode = + match unicode land 0x1F000 with + | 0x0 -> + begin match unicode with + (* utf-8 Basic Latin underscore *) + | x when x = 0x005F -> UnicodeLetter + (* utf-8 Basic Latin letters *) + | x when 0x0041 <= x & x <= 0x005A -> UnicodeLetter + | x when 0x0061 <= x & x <= 0x007A -> UnicodeLetter + (* utf-8 Basic Latin digits and quote *) + | x when 0x0030 <= x & x <= 0x0039 or x = 0x0027 -> UnicodeIdentPart + (* utf-8 Basic Latin symbols *) + | x when x <= 0x007F -> UnicodeSymbol + (* utf-8 Latin-1 non breaking space U00A0 *) + | 0x00A0 -> UnicodeLetter + (* utf-8 Latin-1 symbols U00A1-00BF *) + | x when 0x00A0 <= x & x <= 0x00BF -> UnicodeSymbol + (* utf-8 Latin-1 letters U00C0-00D6 *) + | x when 0x00C0 <= x & x <= 0x00D6 -> UnicodeLetter + (* utf-8 Latin-1 symbol U00D7 *) + | 0x00D7 -> UnicodeSymbol + (* utf-8 Latin-1 letters U00D8-00F6 *) + | x when 0x00D8 <= x & x <= 0x00F6 -> UnicodeLetter + (* utf-8 Latin-1 symbol U00F7 *) + | 0x00F7 -> UnicodeSymbol + (* utf-8 Latin-1 letters U00F8-00FF *) + | x when 0x00F8 <= x & x <= 0x00FF -> UnicodeLetter + (* utf-8 Latin Extended A U0100-017F and Latin Extended B U0180-U0241 *) + | x when 0x0100 <= x & x <= 0x0241 -> UnicodeLetter + (* utf-8 Phonetic letters U0250-02AF *) + | x when 0x0250 <= x & x <= 0x02AF -> UnicodeLetter + (* utf-8 what do to with diacritics U0300-U036F ? *) + (* utf-8 Greek letters U0380-03FF *) + | x when 0x0380 <= x & x <= 0x03FF -> UnicodeLetter + (* utf-8 Cyrillic letters U0400-0481 *) + | x when 0x0400 <= x & x <= 0x0481 -> UnicodeLetter + (* utf-8 Cyrillic symbol U0482 *) + | 0x0482 -> UnicodeSymbol + (* utf-8 what do to with diacritics U0483-U0489 \ U0487 ? *) + (* utf-8 Cyrillic letters U048A-U4F9 (Warning: 04CF) *) + | x when 0x048A <= x & x <= 0x04F9 -> UnicodeLetter + (* utf-8 Cyrillic supplement letters U0500-U050F *) + | x when 0x0500 <= x & x <= 0x050F -> UnicodeLetter + (* utf-8 Hebrew letters U05D0-05EA *) + | x when 0x05D0 <= x & x <= 0x05EA -> UnicodeLetter + (* utf-8 Arabic letters U0621-064A *) + | x when 0x0621 <= x & x <= 0x064A -> UnicodeLetter + (* utf-8 Arabic supplement letters U0750-076D *) + | x when 0x0750 <= x & x <= 0x076D -> UnicodeLetter + | _ -> raise UnsupportedUtf8 + end + | 0x1000 -> + begin match unicode with + (* utf-8 Georgian U10A0-10FF (has holes) *) + | x when 0x10A0 <= x & x <= 0x10FF -> UnicodeLetter + (* utf-8 Hangul Jamo U1100-11FF (has holes) *) + | x when 0x1100 <= x & x <= 0x11FF -> UnicodeLetter + (* utf-8 Latin additional letters U1E00-1E9B and U1EA0-1EF9 *) + | x when 0x1E00 <= x & x <= 0x1E9B -> UnicodeLetter + | x when 0x1EA0 <= x & x <= 0x1EF9 -> UnicodeLetter + | _ -> raise UnsupportedUtf8 + end + | 0x2000 -> + begin match unicode with + (* utf-8 general punctuation U2080-2089 *) + (* Hyphens *) + | x when 0x2010 <= x & x <= 0x2011 -> UnicodeLetter + (* Dashes and other symbols *) + | x when 0x2012 <= x & x <= 0x2027 -> UnicodeSymbol + (* Per mille and per ten thousand signs *) + | x when 0x2030 <= x & x <= 0x2031 -> UnicodeSymbol + (* Prime letters *) + | x when 0x2032 <= x & x <= 0x2034 or x = 0x2057 -> UnicodeIdentPart + (* Miscellaneous punctuation *) + | x when 0x2039 <= x & x <= 0x2056 -> UnicodeSymbol + | x when 0x2058 <= x & x <= 0x205E -> UnicodeSymbol + (* Invisible mathematical operators *) + | x when 0x2061 <= x & x <= 0x2063 -> UnicodeSymbol + (* utf-8 superscript U2070-207C *) + | x when 0x2070 <= x & x <= 0x207C -> UnicodeSymbol + (* utf-8 subscript U2080-2089 *) + | x when 0x2080 <= x & x <= 0x2089 -> UnicodeIdentPart + (* utf-8 letter-like U2100-214F *) + | x when 0x2100 <= x & x <= 0x214F -> UnicodeLetter + (* utf-8 number-forms U2153-2183 *) + | x when 0x2153 <= x & x <= 0x2183 -> UnicodeSymbol + (* utf-8 arrows A U2190-21FF *) + (* utf-8 mathematical operators U2200-22FF *) + (* utf-8 miscellaneous technical U2300-23FF *) + | x when 0x2190 <= x & x <= 0x23FF -> UnicodeSymbol + (* utf-8 box drawing U2500-257F has ceiling, etc. *) + (* utf-8 block elements U2580-259F *) + (* utf-8 geom. shapes U25A0-25FF (has triangles, losange, etc) *) + (* utf-8 miscellaneous symbols U2600-26FF *) + | x when 0x2500 <= x & x <= 0x26FF -> UnicodeSymbol + (* utf-8 arrows B U2900-297F *) + | x when 0x2900 <= x & x <= 0x297F -> UnicodeSymbol + (* utf-8 mathematical operators U2A00-2AFF *) + | x when 0x2A00 <= x & x <= 0x2AFF -> UnicodeSymbol + (* utf-8 bold symbols U2768-U2775 *) + | x when 0x2768 <= x & x <= 0x2775 -> UnicodeSymbol + (* utf-8 arrows and brackets U27E0-U27FF *) + | x when 0x27E0 <= x & x <= 0x27FF -> UnicodeSymbol + (* utf-8 brackets, braces and parentheses *) + | x when 0x2980 <= x & x <= 0x299F -> UnicodeSymbol + (* utf-8 miscellaneous including double-plus U29F0-U29FF *) + | x when 0x29F0 <= x & x <= 0x29FF -> UnicodeSymbol + | _ -> raise UnsupportedUtf8 + end + | _ -> + begin match unicode with + (* utf-8 Hiragana U3040-309F and Katakana U30A0-30FF *) + | x when 0x3040 <= x & x <= 0x30FF -> UnicodeLetter + (* utf-8 Unified CJK Ideographs U4E00-9FA5 *) + | x when 0x4E00 <= x & x <= 0x9FA5 -> UnicodeLetter + (* utf-8 Hangul syllables UAC00-D7AF *) + | x when 0xAC00 <= x & x <= 0xD7AF -> UnicodeLetter + (* utf-8 Gothic U10330-1034A *) + | x when 0x10330 <= x & x <= 0x1034A -> UnicodeLetter + (* utf-8 Math Alphanumeric Symbols U1D400-1D7FF (letters) (has holes) *) + | x when 0x1D400 <= x & x <= 0x1D7CB -> UnicodeLetter + (* utf-8 Math Alphanumeric Symbols U1D400-1D7FF (digits) *) + | x when 0x1D7CE <= x & x <= 0x1D7FF -> UnicodeIdentPart + | _ -> raise UnsupportedUtf8 + end -let list_add_set x l = if List.mem x l then l else x::l +exception End_of_input + +let utf8_of_unicode n = + if n < 128 then + String.make 1 (Char.chr n) + else if n < 2048 then + let s = String.make 2 (Char.chr (128 + n mod 64)) in + begin + s.[0] <- Char.chr (192 + n / 64); + s + end + else if n < 65536 then + let s = String.make 3 (Char.chr (128 + n mod 64)) in + begin + s.[1] <- Char.chr (128 + (n / 64) mod 64); + s.[0] <- Char.chr (224 + n / 4096); + s + end + else + let s = String.make 4 (Char.chr (128 + n mod 64)) in + begin + s.[2] <- Char.chr (128 + (n / 64) mod 64); + s.[1] <- Char.chr (128 + (n / 4096) mod 64); + s.[0] <- Char.chr (240 + n / 262144); + s + end + +let next_utf8 s i = + let err () = invalid_arg "utf8" in + let l = String.length s - i in + if l = 0 then raise End_of_input + else let a = Char.code s.[i] in if a <= 0x7F then + 1, a + else if a land 0x40 = 0 or l = 1 then err () + else let b = Char.code s.[i+1] in if b land 0xC0 <> 0x80 then err () + else if a land 0x20 = 0 then + 2, (a land 0x1F) lsl 6 + (b land 0x3F) + else if l = 2 then err () + else let c = Char.code s.[i+2] in if c land 0xC0 <> 0x80 then err () + else if a land 0x10 = 0 then + 3, (a land 0x0F) lsl 12 + (b land 0x3F) lsl 6 + (c land 0x3F) + else if l = 3 then err () + else let d = Char.code s.[i+3] in if d land 0xC0 <> 0x80 then err () + else if a land 0x08 = 0 then + 4, (a land 0x07) lsl 18 + (b land 0x3F) lsl 12 + + (c land 0x3F) lsl 6 + (d land 0x3F) + else err () + +(* Check the well-formedness of an identifier *) + +let check_ident s = + let i = ref 0 in + if s <> ".." then try + let j, n = next_utf8 s 0 in + match classify_unicode n with + | UnicodeLetter -> + i := !i + j; + begin try + while true do + let j, n = next_utf8 s !i in + match classify_unicode n with + | UnicodeLetter | UnicodeIdentPart -> i := !i + j + | _ -> error + ("invalid character "^(String.sub s !i j)^" in identifier "^s) + done + with End_of_input -> () end + | _ -> error (s^": an identifier should start with a letter") + with + | End_of_input -> error "The empty string is not an identifier" + | UnsupportedUtf8 -> error (s^": unsupported character in utf8 sequence") + | Invalid_argument _ -> error (s^": invalid utf8 sequence") + +let lowercase_unicode s unicode = + match unicode land 0x1F000 with + | 0x0 -> + begin match unicode with + (* utf-8 Basic Latin underscore *) + | x when x = 0x005F -> x + (* utf-8 Basic Latin letters *) + | x when 0x0041 <= x & x <= 0x005A -> x + 32 + | x when 0x0061 <= x & x <= 0x007A -> x + (* utf-8 Latin-1 non breaking space U00A0 *) + | 0x00A0 as x -> x + (* utf-8 Latin-1 letters U00C0-00D6 *) + | x when 0x00C0 <= x & x <= 0x00D6 -> x + 32 + (* utf-8 Latin-1 letters U00D8-00F6 *) + | x when 0x00D8 <= x & x <= 0x00DE -> x + 32 + | x when 0x00E0 <= x & x <= 0x00F6 -> x + (* utf-8 Latin-1 letters U00F8-00FF *) + | x when 0x00F8 <= x & x <= 0x00FF -> x + (* utf-8 Latin Extended A U0100-017F and Latin Extended B U0180-U0241 *) + | x when 0x0100 <= x & x <= 0x017F -> + if x mod 2 = 1 then x else x + 1 + | x when 0x0180 <= x & x <= 0x0241 -> + warning ("Unable to decide which lowercase letter to map to "^s); x + (* utf-8 Phonetic letters U0250-02AF *) + | x when 0x0250 <= x & x <= 0x02AF -> x + (* utf-8 what do to with diacritics U0300-U036F ? *) + (* utf-8 Greek letters U0380-03FF *) + | x when 0x0380 <= x & x <= 0x0385 -> x + | 0x0386 -> 0x03AC + | x when 0x0388 <= x & x <= 0x038A -> x + 37 + | 0x038C -> 0x03CC + | x when 0x038E <= x & x <= 0x038F -> x + 63 + | x when 0x0390 <= x & x <= 0x03AB & x <> 0x03A2 -> x + 32 + (* utf-8 Greek lowercase letters U03B0-03CE *) + | x when 0x03AC <= x & x <= 0x03CE -> x + | x when 0x03CF <= x & x <= 0x03FF -> + warning ("Unable to decide which lowercase letter to map to "^s); x + (* utf-8 Cyrillic letters U0400-0481 *) + | x when 0x0400 <= x & x <= 0x040F -> x + 80 + | x when 0x0410 <= x & x <= 0x042F -> x + 32 + | x when 0x0430 <= x & x <= 0x045F -> x + | x when 0x0460 <= x & x <= 0x0481 -> + if x mod 2 = 1 then x else x + 1 + (* utf-8 Cyrillic letters U048A-U4F9 (Warning: 04CF) *) + | x when 0x048A <= x & x <= 0x04F9 & x <> 0x04CF -> + if x mod 2 = 1 then x else x + 1 + (* utf-8 Cyrillic supplement letters U0500-U050F *) + | x when 0x0500 <= x & x <= 0x050F -> + if x mod 2 = 1 then x else x + 1 + (* utf-8 Hebrew letters U05D0-05EA *) + | x when 0x05D0 <= x & x <= 0x05EA -> x + (* utf-8 Arabic letters U0621-064A *) + | x when 0x0621 <= x & x <= 0x064A -> x + (* utf-8 Arabic supplement letters U0750-076D *) + | x when 0x0750 <= x & x <= 0x076D -> x + | _ -> raise UnsupportedUtf8 + end + | 0x1000 -> + begin match unicode with + (* utf-8 Georgian U10A0-10FF (has holes) *) + | x when 0x10A0 <= x & x <= 0x10FF -> x + (* utf-8 Hangul Jamo U1100-11FF (has holes) *) + | x when 0x1100 <= x & x <= 0x11FF -> x + (* utf-8 Latin additional letters U1E00-1E9B and U1EA0-1EF9 *) + | x when 0x1E00 <= x & x <= 0x1E95 -> + if x mod 2 = 1 then x else x + 1 + | x when 0x1E96 <= x & x <= 0x1E9B -> x + | x when 0x1EA0 <= x & x <= 0x1EF9 -> + if x mod 2 = 1 then x else x + 1 + | _ -> raise UnsupportedUtf8 + end + | 0x2000 -> + begin match unicode with + (* utf-8 general punctuation U2080-2089 *) + (* Hyphens *) + | x when 0x2010 <= x & x <= 0x2011 -> x + (* utf-8 letter-like U2100-214F *) + | 0x2102 (* double-struck C *) -> Char.code 'x' + | 0x2115 (* double-struck N *) -> Char.code 'n' + | 0x2119 (* double-struck P *) -> Char.code 'x' + | 0x211A (* double-struck Q *) -> Char.code 'x' + | 0x211D (* double-struck R *) -> Char.code 'r' + | 0x2124 (* double-struck Z *) -> Char.code 'x' + | x when 0x2100 <= x & x <= 0x214F -> + warning ("Unable to decide which lowercase letter to map to "^s); x + | _ -> raise UnsupportedUtf8 + end + | _ -> + begin match unicode with + (* utf-8 Hiragana U3040-309F and Katakana U30A0-30FF *) + | x when 0x3040 <= x & x <= 0x30FF -> x + (* utf-8 Unified CJK Ideographs U4E00-9FA5 *) + | x when 0x4E00 <= x & x <= 0x9FA5 -> x + (* utf-8 Hangul syllables UAC00-D7AF *) + | x when 0xAC00 <= x & x <= 0xD7AF -> x + (* utf-8 Gothic U10330-1034A *) + | x when 0x10330 <= x & x <= 0x1034A -> x + (* utf-8 Math Alphanumeric Symbols U1D400-1D7FF (letters) (has holes) *) + | x when 0x1D6A8 <= x & x <= 0x1D7C9 -> + let a = (x - 0x1D6A8) mod 58 in + if a <= 16 or (18 <= a & a <= 24) + then x + 26 (* all but nabla and theta symbol *) + else x + | x when 0x1D538 <= x & x <= 0x1D56B -> + (* Use ordinary lowercase in both small and capital double-struck *) + (x - 0x1D538) mod 26 + Char.code 'a' + | x when 0x1D468 <= x & x <= 0x1D6A3 -> (* General case *) + if (x - 0x1D400 / 26) mod 2 = 0 then x + 26 else x + | x when 0x1D400 <= x & x <= 0x1D7CB -> (* fallback *) + x + (* utf-8 Math Alphanumeric Symbols U1D400-1D7FF (digits) *) + | x when 0x1D7CE <= x & x <= 0x1D7FF -> x + | _ -> raise UnsupportedUtf8 + end + +let lowercase_first_char_utf8 s = + assert (s <> ""); + let j, n = next_utf8 s 0 in + utf8_of_unicode (lowercase_unicode (String.sub s 0 j) n) + +(* Lists *) let list_intersect l1 l2 = List.filter (fun x -> List.mem x l2) l1 @@ -160,6 +485,11 @@ let list_tabulate f len = in tabrec 0 +let rec list_make n v = + if n = 0 then [] + else if n < 0 then invalid_arg "list_make" + else v::list_make (n-1) v + let list_assign l n e = let rec assrec stk = function | ((h::t), 0) -> List.rev_append stk (e::t) @@ -205,6 +535,14 @@ let list_map3 f l1 l2 l3 = in map (l1,l2,l3) +let list_map4 f l1 l2 l3 l4 = + let rec map = function + | ([], [], [], []) -> [] + | ((h1::t1), (h2::t2), (h3::t3), (h4::t4)) -> let v = f h1 h2 h3 h4 in v::map (t1,t2,t3,t4) + | (_, _, _, _) -> invalid_arg "map4" + in + map (l1,l2,l3,l4) + let list_index x = let rec index_x n = function | y::l -> if x = y then n else index_x (succ n) l @@ -212,6 +550,8 @@ let list_index x = in index_x 1 +let list_index0 x l = list_index x l - 1 + let list_unique_index x = let rec index_x n = function | y::l -> @@ -222,6 +562,13 @@ let list_unique_index x = | [] -> raise Not_found in index_x 1 +let list_fold_right_i f i l = + let rec it_list_f i l a = match l with + | [] -> a + | b::l -> f (i-1) b (it_list_f (i-1) l a) + in + it_list_f (List.length l + i) l + let list_fold_left_i f = let rec it_list_f i a = function | [] -> a @@ -247,6 +594,14 @@ let rec list_fold_right_and_left f l hd = | a::l -> let hd = aux (a::tl) l in f hd a tl in aux [] l +let list_iter3 f l1 l2 l3 = + let rec iter = function + | ([], [], []) -> () + | ((h1::t1), (h2::t2), (h3::t3)) -> f h1 h2 h3; iter (t1,t2,t3) + | (_, _, _) -> invalid_arg "map3" + in + iter (l1,l2,l3) + let list_iter_i f l = list_fold_left_i (fun i _ x -> f i x) 0 () l let list_for_all_i p = @@ -265,6 +620,8 @@ let rec list_remove_first a = function | b::l -> b::list_remove_first a l | [] -> raise Not_found +let list_add_set x l = if List.mem x l then l else x::l + let list_eq_set l1 l2 = let rec aux l1 = function | [] -> l1 = [] @@ -300,14 +657,46 @@ let list_try_find f = try_find_f let list_uniquize l = + let visited = Hashtbl.create 23 in let rec aux acc = function - | [] -> List.rev acc - | h::t -> if List.mem h acc then aux acc t else aux (h::acc) t + | h::t -> if Hashtbl.mem visited h then aux acc t else + begin + Hashtbl.add visited h h; + aux (h::acc) t + end + | [] -> List.rev acc in aux [] l -let rec list_distinct = function - | h::t -> (not (List.mem h t)) && list_distinct t - | _ -> true +let rec list_distinct l = + let visited = Hashtbl.create 23 in + let rec loop = function + | h::t -> + if Hashtbl.mem visited h then false + else + begin + Hashtbl.add visited h h; + loop t + end + | [] -> true + in loop l + +let rec list_merge_uniq cmp l1 l2 = + match l1, l2 with + | [], l2 -> l2 + | l1, [] -> l1 + | h1 :: t1, h2 :: t2 -> + let c = cmp h1 h2 in + if c = 0 + then h1 :: list_merge_uniq cmp t1 t2 + else if c <= 0 + then h1 :: list_merge_uniq cmp t1 l2 + else h2 :: list_merge_uniq cmp l1 t2 + +let rec list_duplicates = function + | [] -> [] + | x::l -> + let l' = list_duplicates l in + if List.mem x l then list_add_set x l' else l' let rec list_filter2 f = function | [], [] as p -> p @@ -316,6 +705,12 @@ let rec list_filter2 f = function if f d l then d::dp', l::lp' else p | _ -> invalid_arg "list_filter2" +let rec list_map_filter f = function + | [] -> [] + | x::l -> + let l' = list_map_filter f l in + match f x with None -> l' | Some y -> y::l' + let list_subset l1 l2 = let t2 = Hashtbl.create 151 in List.iter (fun x -> Hashtbl.add t2 x ()) l2; @@ -338,6 +733,14 @@ let rec list_split3 = function | (x,y,z)::l -> let (rx, ry, rz) = list_split3 l in (x::rx, y::ry, z::rz) +let rec list_insert_in_class f a = function + | [] -> [[a]] + | (b::_ as l)::classes when f a b -> (a::l)::classes + | l::classes -> l :: list_insert_in_class f a classes + +let list_partition_by f l = + List.fold_right (list_insert_in_class f) l [] + let list_firstn n l = let rec aux acc = function | (0, l) -> List.rev acc @@ -363,6 +766,9 @@ let rec list_skipn n l = match n,l with | _, [] -> failwith "list_fromn" | n, _::l -> list_skipn (pred n) l +let rec list_addn n x l = + if n = 0 then l else x :: (list_addn (pred n) x l) + let list_prefix_of prefl l = let rec prefrec = function | (h1::t1, h2::t2) -> h1 = h2 && prefrec (t1,t2) @@ -384,6 +790,7 @@ let list_drop_prefix p l = | None -> l let list_map_append f l = List.flatten (List.map f l) +let list_join_map = list_map_append (* Alias *) let list_map_append2 f l1 l2 = List.flatten (List.map2 f l1 l2) @@ -394,8 +801,6 @@ let list_share_tails l1 l2 = in shr_rev [] (List.rev l1, List.rev l2) -let list_join_map f l = List.flatten (List.map f l) - let rec list_fold_map f e = function | [] -> (e,[]) | h::t -> @@ -419,6 +824,35 @@ let list_fold_map' f l e = let list_map_assoc f = List.map (fun (x,a) -> (x,f a)) +(* Specification: + - =p= is set equality (double inclusion) + - f such that \forall l acc, (f l acc) =p= append (f l []) acc + - let g = fun x -> f x [] in + - union_map f l acc =p= append (flatten (map g l)) acc + *) +let list_union_map f l acc = + List.fold_left + (fun x y -> f y x) + acc + l + +(* A generic cartesian product: for any operator (**), + [list_cartesian (**) [x1;x2] [y1;y2] = [x1**y1; x1**y2; x2**y1; x2**y1]], + and so on if there are more elements in the lists. *) + +let rec list_cartesian op l1 l2 = + list_map_append (fun x -> List.map (op x) l2) l1 + +(* [list_cartesians] is an n-ary cartesian product: it iterates + [list_cartesian] over a list of lists. *) + +let list_cartesians op init ll = + List.fold_right (list_cartesian op) ll [init] + +(* list_combinations [[a;b];[c;d]] gives [[a;c];[a;d];[b;c];[b;d]] *) + +let list_combinations l = list_cartesians (fun x l -> x::l) [] l + (* Arrays *) let array_exists f v = @@ -645,6 +1079,11 @@ let array_map_left_pair f a g b = r, s end +let array_iter2 f v1 v2 = + let n = Array.length v1 in + if Array.length v2 <> n then invalid_arg "array_iter2" + else for i = 0 to n - 1 do f v1.(i) v2.(i) done + let pure_functional = false let array_fold_map' f v e = @@ -659,6 +1098,11 @@ else let v' = Array.map (fun x -> let (y,e) = f x !e' in e' := e; y) v in (v',!e') +let array_fold_map f e v = + let e' = ref e in + let v' = Array.map (fun x -> let (e,y) = f !e' x in e' := e; y) v in + (!e',v') + let array_fold_map2' f v1 v2 e = let e' = ref e in let v' = @@ -677,6 +1121,12 @@ let array_distinct v = with Exit -> false +let array_union_map f a acc = + Array.fold_left + (fun x y -> f y x) + acc + a + (* Matrices *) let matrix_transpose mat = @@ -723,44 +1173,6 @@ let interval n m = in interval_n ([],m) -let in_some x = Some x - -let out_some = function - | Some x -> x - | None -> failwith "out_some" - -let option_map f = function - | None -> None - | Some x -> Some (f x) - -let option_cons a l = match a with - | Some x -> x::l - | None -> l - -let option_fold_left2 f e a b = match (a,b) with - | Some x, Some y -> f e x y - | _ -> e - -let option_fold_left f e a = match a with - | Some x -> f e x - | _ -> e - -let option_fold_right f a e = match a with - | Some x -> f x e - | _ -> e - -let option_compare f a b = match (a,b) with - | None, None -> true - | Some a', Some b' -> f a' b' - | _ -> failwith "option_compare" - -let option_iter f = function - | None -> () - | Some x -> f x - -let option_smartmap f a = match a with - | None -> a - | Some x -> let x' = f x in if x'==x then a else Some x' let map_succeed f = let rec map_f = function @@ -780,15 +1192,21 @@ let pr_semicolon () = str ";" ++ spc () let pr_bar () = str "|" ++ spc () let pr_arg pr x = spc () ++ pr x let pr_opt pr = function None -> mt () | Some x -> pr_arg pr x +let pr_opt_no_spc pr = function None -> mt () | Some x -> pr x -let pr_ord n = - let suff = match n mod 10 with 1 -> "st" | 2 -> "nd" | _ -> "th" in - int n ++ str suff +let nth n = str (ordinal n) let rec prlist elem l = match l with | [] -> mt () | h::t -> Stream.lapp (fun () -> elem h) (prlist elem t) +(* unlike all other functions below, [prlist] works lazily. + if a strict behavior is needed, use [prlist_strict] instead. *) + +let rec prlist_strict elem l = match l with + | [] -> mt () + | h::t -> (elem h)++(prlist_strict elem t) + let rec prlist_with_sep sep elem l = match l with | [] -> mt () | [h] -> elem h @@ -821,6 +1239,8 @@ let prvect_with_sep sep elem v = let n = Array.length v in if n = 0 then mt () else pr (n - 1) +let prvect elem v = prvect_with_sep mt elem v + let surround p = hov 1 (str"(" ++ p ++ str")") (*s Size of ocaml values. *) -- cgit v1.2.3