(************************************************************************) (* v * The Coq Proof Assistant / The Coq Development Team *) (* e::l let magic = 1249 let merge_profile filename (curr_table, curr_outside, curr_total as new_data) = let (old_table, old_outside, old_total) = try let c = open_in filename in if input_binary_int c <> magic then Printf.printf "Incompatible recording file: %s\n" filename; let old_data = input_value c in close_in c; old_data with Sys_error msg -> (Printf.printf "Unable to open %s: %s\n" filename msg; new_data) in let updated_data = let updated_table = List.fold_right ajoute_to_list curr_table old_table in ajoute curr_outside old_outside; ajoute curr_total old_total; (updated_table, old_outside, old_total) in begin (try let c = open_out_gen [Open_creat;Open_wronly;Open_trunc;Open_binary] 0o644 filename in output_binary_int c magic; output_value c updated_data; close_out c with Sys_error _ -> Printf.printf "Unable to create recording file"); updated_data end (************************************************) (* Compute a rough estimation of time overheads *) (* Time and space are not measured in the same way *) (* Byte allocation is an exact number and for long runs, the total number of allocated bytes may exceed the maximum integer capacity (2^31 on 32-bits architectures); therefore, allocation is measured by small steps, total allocations are computed by adding elementary measures and carries are controled from step to step *) (* Unix measure of time is approximative and shoitt delays are often unperceivable; therefore, total times are measured in one (big) step to avoid rounding errors and to get the best possible approximation *) (* ---------- start profile for f1 overheadA| ... ---------- [1w1] 1st call to get_time for f1 overheadB| ... ---------- start f1 real 1 | ... ---------- start profile for 1st call to f2 inside f1 overheadA| ... ---------- [2w1] 1st call to get_time for 1st f2 overheadB| ... ---------- start 1st f2 real 2 | ... ---------- end 1st f2 overheadC| ... ---------- [2w1] 2nd call to get_time for 1st f2 overheadD| ... ---------- end profile for 1st f2 real 1 | ... ---------- start profile for 2nd call to f2 inside f1 overheadA| ... ---------- [2'w1] 1st call to get_time for 2nd f2 overheadB| ... ---------- start 2nd f2 real 2' | ... ---------- end 2nd f2 overheadC| ... ---------- [2'w2] 2nd call to get_time for 2nd f2 overheadD| ... ---------- end profile for f2 real 1 | ... ---------- end f1 overheadC| ... ---------- [1w1'] 2nd call to get_time for f1 overheadD| ... ---------- end profile for f1 When profiling f2, overheadB + overheadC should be subtracted from measure and overheadA + overheadB + overheadC + overheadD should be subtracted from the amount for f1 Then the relevant overheads are : "overheadB + overheadC" to be subtracted to the measure of f as many time as f is called and "overheadA + overheadB + overheadC + overheadD" to be subtracted to the measure of f as many time as f calls a profiled function (itself included) *) let dummy_last_alloc = ref 0.0 let dummy_spent_alloc () = let now = get_alloc () in let before = !last_alloc in last_alloc := now; now -. before let dummy_f x = x let dummy_stack = ref [create_record ()] let dummy_ov = 0 let loops = 10000 let time_overhead_A_D () = let e = create_record () in let before = get_time () in for i=1 to loops do (* This is a copy of profile1 for overhead estimation *) let dw = dummy_spent_alloc () in match !dummy_stack with [] -> assert false | p::_ -> ajoute_ownalloc p dw; ajoute_totalloc p dw; e.owncount <- e.owncount + 1; if not (p==e) then stack := e::!stack; let totalloc0 = e.totalloc in let intcount0 = e.intcount in let dt = get_time () - 1 in e.tottime <- dt + dummy_ov; e.owntime <- e.owntime + e.tottime; ajoute_ownalloc p dw; ajoute_totalloc p dw; p.owntime <- p.owntime - e.tottime; ajoute_totalloc p (e.totalloc-.totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !dummy_stack with [] -> assert false | _::s -> stack := s); dummy_last_alloc := get_alloc () done; let after = get_time () in let beforeloop = get_time () in for i=1 to loops do () done; let afterloop = get_time () in float_of_int ((after - before) - (afterloop - beforeloop)) /. float_of_int loops let time_overhead_B_C () = let dummy_x = 0 in let before = get_time () in for i=1 to loops do try dummy_last_alloc := get_alloc (); let _r = dummy_f dummy_x in let _dw = dummy_spent_alloc () in let _dt = get_time () in () with _ -> assert false done; let after = get_time () in let beforeloop = get_time () in for i=1 to loops do () done; let afterloop = get_time () in float_of_int ((after - before) - (afterloop - beforeloop)) /. float_of_int loops let compute_alloc lo = lo /. (float_of_int word_length) (************************************************) (* End a profiling session and print the result *) let format_profile (table, outside, total) = print_newline (); Printf.printf "%-23s %9s %9s %10s %10s %10s\n" "Function name" "Own time" "Tot. time" "Own alloc" "Tot. alloc" "Calls "; let l = Sort.list (fun (_,{tottime=p}) (_,{tottime=p'}) -> p > p') table in List.iter (fun (name,e) -> Printf.printf "%-23s %9.2f %9.2f %10.0f %10.0f %6d %6d\n" name (float_of_time e.owntime) (float_of_time e.tottime) (compute_alloc e.ownalloc) (compute_alloc e.totalloc) e.owncount e.intcount) l; Printf.printf "%-23s %9.2f %9.2f %10.0f %10.0f %6d\n" "others" (float_of_time outside.owntime) (float_of_time outside.tottime) (compute_alloc outside.ownalloc) (compute_alloc outside.totalloc) outside.intcount; (* Here, own contains overhead time/alloc *) Printf.printf "%-23s %9.2f %9.2f %10.0f %10.0f\n" "Est. overhead/total" (float_of_time total.owntime) (float_of_time total.tottime) (compute_alloc total.ownalloc) (compute_alloc total.totalloc); Printf.printf "Time in seconds and allocation in words (1 word = %d bytes)\n" word_length let recording_file = ref "" let set_recording s = recording_file := s let adjust_time ov_bc ov_ad e = let bc_imm = float_of_int e.owncount *. ov_bc in let ad_imm = float_of_int e.immcount *. ov_ad in let abcd_all = float_of_int e.intcount *. (ov_ad +. ov_bc) in {e with tottime = e.tottime - int_of_float (abcd_all +. bc_imm); owntime = e.owntime - int_of_float (ad_imm +. bc_imm) } let close_profile print = let dw = spent_alloc () in let t = get_time () in match !stack with | [outside] -> outside.tottime <- outside.tottime + t; outside.owntime <- outside.owntime + t; ajoute_ownalloc outside dw; ajoute_totalloc outside dw; if !prof_table <> [] then begin let ov_bc = time_overhead_B_C () (* B+C overhead *) in let ov_ad = time_overhead_A_D () (* A+D overhead *) in let adjust (n,e) = (n, adjust_time ov_bc ov_ad e) in let adjtable = List.map adjust !prof_table in let adjoutside = adjust_time ov_bc ov_ad outside in let totalloc = !last_alloc -. !init_alloc in let total = create_record () in total.tottime <- outside.tottime; total.totalloc <- totalloc; (* We compute estimations of overhead, put into "own" fields *) total.owntime <- outside.tottime - adjoutside.tottime; total.ownalloc <- totalloc -. outside.totalloc; let current_data = (adjtable, adjoutside, total) in let updated_data = match !recording_file with | "" -> current_data | name -> merge_profile !recording_file current_data in if print then format_profile updated_data; init_profile () end | _ -> failwith "Inconsistency" let append_profile () = close_profile false let print_profile () = close_profile true let declare_profile name = if name = "___outside___" or name = "___total___" then failwith ("Error: "^name^" is a reserved keyword"); let e = create_record () in prof_table := (name,e)::!prof_table; e (* Default initialisation, may be overriden *) let _ = init_profile () (******************************) (* Entry points for profiling *) let profile1 e f a = let dw = spent_alloc () in match !stack with [] -> assert false | p::_ -> (* We add spent alloc since last measure to current caller own/total alloc *) ajoute_ownalloc p dw; ajoute_totalloc p dw; e.owncount <- e.owncount + 1; if not (p==e) then stack := e::!stack; let totalloc0 = e.totalloc in let intcount0 = e.intcount in let t = get_time () in try last_alloc := get_alloc (); let r = f a in let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); r with exn -> let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); raise exn let profile2 e f a b = let dw = spent_alloc () in match !stack with [] -> assert false | p::_ -> (* We add spent alloc since last measure to current caller own/total alloc *) ajoute_ownalloc p dw; ajoute_totalloc p dw; e.owncount <- e.owncount + 1; if not (p==e) then stack := e::!stack; let totalloc0 = e.totalloc in let intcount0 = e.intcount in let t = get_time () in try last_alloc := get_alloc (); let r = f a b in let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); r with exn -> let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); raise exn let profile3 e f a b c = let dw = spent_alloc () in match !stack with [] -> assert false | p::_ -> (* We add spent alloc since last measure to current caller own/total alloc *) ajoute_ownalloc p dw; ajoute_totalloc p dw; e.owncount <- e.owncount + 1; if not (p==e) then stack := e::!stack; let totalloc0 = e.totalloc in let intcount0 = e.intcount in let t = get_time () in try last_alloc := get_alloc (); let r = f a b c in let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); r with exn -> let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); raise exn let profile4 e f a b c d = let dw = spent_alloc () in match !stack with [] -> assert false | p::_ -> (* We add spent alloc since last measure to current caller own/total alloc *) ajoute_ownalloc p dw; ajoute_totalloc p dw; e.owncount <- e.owncount + 1; if not (p==e) then stack := e::!stack; let totalloc0 = e.totalloc in let intcount0 = e.intcount in let t = get_time () in try last_alloc := get_alloc (); let r = f a b c d in let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); r with exn -> let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); raise exn let profile5 e f a b c d g = let dw = spent_alloc () in match !stack with [] -> assert false | p::_ -> (* We add spent alloc since last measure to current caller own/total alloc *) ajoute_ownalloc p dw; ajoute_totalloc p dw; e.owncount <- e.owncount + 1; if not (p==e) then stack := e::!stack; let totalloc0 = e.totalloc in let intcount0 = e.intcount in let t = get_time () in try last_alloc := get_alloc (); let r = f a b c d g in let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); r with exn -> let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); raise exn let profile6 e f a b c d g h = let dw = spent_alloc () in match !stack with [] -> assert false | p::_ -> (* We add spent alloc since last measure to current caller own/total alloc *) ajoute_ownalloc p dw; ajoute_totalloc p dw; e.owncount <- e.owncount + 1; if not (p==e) then stack := e::!stack; let totalloc0 = e.totalloc in let intcount0 = e.intcount in let t = get_time () in try last_alloc := get_alloc (); let r = f a b c d g h in let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); r with exn -> let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); raise exn let profile7 e f a b c d g h i = let dw = spent_alloc () in match !stack with [] -> assert false | p::_ -> (* We add spent alloc since last measure to current caller own/total alloc *) ajoute_ownalloc p dw; ajoute_totalloc p dw; e.owncount <- e.owncount + 1; if not (p==e) then stack := e::!stack; let totalloc0 = e.totalloc in let intcount0 = e.intcount in let t = get_time () in try last_alloc := get_alloc (); let r = f a b c d g h i in let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); r with exn -> let dw = spent_alloc () in let dt = get_time () - t in e.tottime <- e.tottime + dt; e.owntime <- e.owntime + dt; ajoute_ownalloc e dw; ajoute_totalloc e dw; p.owntime <- p.owntime - dt; ajoute_totalloc p (e.totalloc -. totalloc0); p.intcount <- p.intcount + e.intcount - intcount0 + 1; p.immcount <- p.immcount + 1; if not (p==e) then (match !stack with [] -> assert false | _::s -> stack := s); last_alloc := get_alloc (); raise exn (* Some utilities to compute the logical and physical sizes and depth of ML objects *) let c = ref 0 let s = ref 0 let b = ref 0 let m = ref 0 let rec obj_stats d t = if Obj.is_int t then m := max d !m else if Obj.tag t >= Obj.no_scan_tag then if Obj.tag t = Obj.string_tag then (c := !c + Obj.size t; b := !b + 1; m := max d !m) else if Obj.tag t = Obj.double_tag then (s := !s + 2; b := !b + 1; m := max d !m) else if Obj.tag t = Obj.double_array_tag then (s := !s + 2 * Obj.size t; b := !b + 1; m := max d !m) else (b := !b + 1; m := max d !m) else let n = Obj.size t in s := !s + n; b := !b + 1; block_stats (d + 1) (n - 1) t and block_stats d i t = if i >= 0 then (obj_stats d (Obj.field t i); block_stats d (i-1) t) let obj_stats a = c := 0; s:= 0; b:= 0; m:= 0; obj_stats 0 (Obj.repr a); (!c, !s + !b, !m) module H = Hashtbl.Make( struct type t = Obj.t let equal = (==) let hash o = Hashtbl.hash (Obj.magic o : int) end) let tbl = H.create 13 let rec obj_shared_size s t = if Obj.is_int t then s else if H.mem tbl t then s else begin H.add tbl t (); let n = Obj.size t in if Obj.tag t >= Obj.no_scan_tag then if Obj.tag t = Obj.string_tag then (c := !c + n; s + 1) else if Obj.tag t = Obj.double_tag then s + 3 else if Obj.tag t = Obj.double_array_tag then s + 2 * n + 1 else s + 1 else block_shared_size (s + n + 1) (n - 1) t end and block_shared_size s i t = if i < 0 then s else block_shared_size (obj_shared_size s (Obj.field t i)) (i-1) t let obj_shared_size a = H.clear tbl; c := 0; let s = obj_shared_size 0 (Obj.repr a) in (!c, s) let print_logical_stats a = let (c, s, d) = obj_stats a in Printf.printf "Expanded size: %10d (str: %8d) Depth: %6d\n" (s+c) c d let print_stats a = let (c1, s, d) = obj_stats a in let (c2, o) = obj_shared_size a in Printf.printf "Size: %8d (str: %8d) (exp: %10d) Depth: %6d\n" (o + c2) c2 (s + c1) d (* let _ = Gc.set { (Gc.get()) with Gc.verbose = 13 } *)