diff options
author | 2015-03-24 21:01:42 +0100 | |
---|---|---|
committer | 2015-06-25 15:54:40 +0200 | |
commit | 679e49dbdc1448c65122eecd24783eb35376d492 (patch) | |
tree | 5055b0e9039d91b6c8cc42b9f7d121ec7c381542 | |
parent | 9c7b70032972ed613bcca02cceedcd33c240f7a8 (diff) |
Adding a more efficient representation of OCaml objects in votour.
-rw-r--r-- | Makefile.build | 2 | ||||
-rw-r--r-- | checker/analyze.ml | 350 | ||||
-rw-r--r-- | checker/analyze.mli | 35 | ||||
-rw-r--r-- | checker/votour.ml | 89 |
4 files changed, 464 insertions, 12 deletions
diff --git a/Makefile.build b/Makefile.build index 92c0d46f2..16c573086 100644 --- a/Makefile.build +++ b/Makefile.build @@ -654,7 +654,7 @@ $(FAKEIDE): lib/clib$(BESTLIB) lib/xml_lexer$(BESTOBJ) lib/xml_parser$(BESTOBJ) # votour: a small vo explorer (based on the checker) -bin/votour: lib/cObj$(BESTOBJ) checker/values$(BESTOBJ) checker/votour.ml +bin/votour: lib/cObj$(BESTOBJ) checker/analyze$(BESTOBJ) checker/values$(BESTOBJ) checker/votour.ml $(SHOW)'OCAMLBEST -o $@' $(HIDE)$(call bestocaml, -I checker,) diff --git a/checker/analyze.ml b/checker/analyze.ml new file mode 100644 index 000000000..c48b83011 --- /dev/null +++ b/checker/analyze.ml @@ -0,0 +1,350 @@ +(** Headers *) + +let prefix_small_block = 0x80 +let prefix_small_int = 0x40 +let prefix_small_string = 0x20 + +let code_int8 = 0x00 +let code_int16 = 0x01 +let code_int32 = 0x02 +let code_int64 = 0x03 +let code_shared8 = 0x04 +let code_shared16 = 0x05 +let code_shared32 = 0x06 +let code_double_array32_little = 0x07 +let code_block32 = 0x08 +let code_string8 = 0x09 +let code_string32 = 0x0A +let code_double_big = 0x0B +let code_double_little = 0x0C +let code_double_array8_big = 0x0D +let code_double_array8_little = 0x0E +let code_double_array32_big = 0x0F +let code_codepointer = 0x10 +let code_infixpointer = 0x11 +let code_custom = 0x12 +let code_block64 = 0x13 + +type code_descr = +| CODE_INT8 +| CODE_INT16 +| CODE_INT32 +| CODE_INT64 +| CODE_SHARED8 +| CODE_SHARED16 +| CODE_SHARED32 +| CODE_DOUBLE_ARRAY32_LITTLE +| CODE_BLOCK32 +| CODE_STRING8 +| CODE_STRING32 +| CODE_DOUBLE_BIG +| CODE_DOUBLE_LITTLE +| CODE_DOUBLE_ARRAY8_BIG +| CODE_DOUBLE_ARRAY8_LITTLE +| CODE_DOUBLE_ARRAY32_BIG +| CODE_CODEPOINTER +| CODE_INFIXPOINTER +| CODE_CUSTOM +| CODE_BLOCK64 + +let code_max = 0x13 + +let magic_number = "\132\149\166\190" + +(** Memory reification *) + +type repr = +| RInt of int +| RBlock of (int * int) (* tag × len *) +| RString of string +| RPointer of int +| RCode of int + +type data = +| Int of int (* value *) +| Ptr of int (* pointer *) +| Atm of int (* tag *) +| Fun of int (* address *) + +type obj = +| Struct of int * data array (* tag × data *) +| String of string + +module type Input = +sig + type t + val input_byte : t -> int + val input_binary_int : t -> int +end + +module type S = +sig + type input + val parse : input -> (data * obj array) +end + +module Make(M : Input) = +struct + +open M + +type input = M.t + +let current_offset = ref 0 + +let input_byte chan = + let () = incr current_offset in + input_byte chan + +let input_binary_int chan = + let () = current_offset := !current_offset + 4 in + input_binary_int chan + +let input_char chan = Char.chr (input_byte chan) + +let parse_header chan = + let () = current_offset := 0 in + let magic = String.create 4 in + let () = for i = 0 to 3 do magic.[i] <- input_char chan done in + let length = input_binary_int chan in + let objects = input_binary_int chan in + let size32 = input_binary_int chan in + let size64 = input_binary_int chan in + (magic, length, size32, size64, objects) + +let input_int8s chan = + let i = input_byte chan in + if i land 0x80 = 0 + then i + else i lor ((-1) lsl 8) + +let input_int8u = input_byte + +let input_int16s chan = + let i = input_byte chan in + let j = input_byte chan in + let ans = (i lsl 8) lor j in + if i land 0x80 = 0 + then ans + else ans lor ((-1) lsl 16) + +let input_int16u chan = + let i = input_byte chan in + let j = input_byte chan in + (i lsl 8) lor j + +let input_int32s chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let ans = (i lsl 24) lor (j lsl 16) lor (k lsl 8) lor l in + if i land 0x80 = 0 + then ans + else ans lor ((-1) lsl 31) + +let input_int32u chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + (i lsl 24) lor (j lsl 16) lor (k lsl 8) lor l + +let input_int64s chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let m = input_byte chan in + let n = input_byte chan in + let o = input_byte chan in + let p = input_byte chan in + let ans = + (i lsl 56) lor (j lsl 48) lor (k lsl 40) lor (l lsl 32) lor + (m lsl 24) lor (n lsl 16) lor (o lsl 8) lor p + in + if i land 0x80 = 0 + then ans + else ans lor ((-1) lsl 63) + +let input_int64u chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let m = input_byte chan in + let n = input_byte chan in + let o = input_byte chan in + let p = input_byte chan in + (i lsl 56) lor (j lsl 48) lor (k lsl 40) lor (l lsl 32) lor + (m lsl 24) lor (n lsl 16) lor (o lsl 8) lor p + +let input_header32 chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let tag = l in + let len = (i lsl 14) lor (j lsl 6) lor (k lsr 2) in + (tag, len) + +let input_header64 chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let m = input_byte chan in + let n = input_byte chan in + let o = input_byte chan in + let p = input_byte chan in + let tag = p in + let len = + (i lsl 46) lor (j lsl 38) lor (k lsl 30) lor (l lsl 22) lor + (m lsl 14) lor (n lsl 6) lor (o lsr 2) + in + (tag, len) + +let input_string len chan = + let ans = String.create len in + for i = 0 to pred len do + ans.[i] <- input_char chan; + done; + ans + +let parse_object chan = + let data = input_byte chan in + if prefix_small_block <= data then + let tag = data land 0x0F in + let len = (data lsr 4) land 0x07 in + RBlock (tag, len) + else if prefix_small_int <= data then + RInt (data land 0x3F) + else if prefix_small_string <= data then + let len = data land 0x1F in + RString (input_string len chan) + else if data > code_max then + assert false + else match (Obj.magic data) with + | CODE_INT8 -> + RInt (input_int8s chan) + | CODE_INT16 -> + RInt (input_int16s chan) + | CODE_INT32 -> + RInt (input_int32s chan) + | CODE_INT64 -> + RInt (input_int64s chan) + | CODE_SHARED8 -> + RPointer (input_int8u chan) + | CODE_SHARED16 -> + RPointer (input_int16u chan) + | CODE_SHARED32 -> + RPointer (input_int32u chan) + | CODE_BLOCK32 -> + RBlock (input_header32 chan) + | CODE_BLOCK64 -> + RBlock (input_header64 chan) + | CODE_STRING8 -> + let len = input_int8u chan in + RString (input_string len chan) + | CODE_STRING32 -> + let len = input_int32u chan in + RString (input_string len chan) + | CODE_CODEPOINTER -> + let addr = input_int32u chan in + for i = 0 to 15 do ignore (input_byte chan); done; + RCode addr + | CODE_DOUBLE_ARRAY32_LITTLE + | CODE_DOUBLE_BIG + | CODE_DOUBLE_LITTLE + | CODE_DOUBLE_ARRAY8_BIG + | CODE_DOUBLE_ARRAY8_LITTLE + | CODE_DOUBLE_ARRAY32_BIG + | CODE_INFIXPOINTER + | CODE_CUSTOM -> + Printf.eprintf "Unhandled code %04x\n%!" data; assert false + +let parse chan = + let (magic, len, _, _, size) = parse_header chan in + let () = assert (magic = magic_number) in + let memory = Array.make size (Struct ((-1), [||])) in + let current_object = ref 0 in + let fill_obj = function + | RPointer n -> + let data = Ptr (!current_object - n) in + data, None + | RInt n -> + let data = Int n in + data, None + | RString s -> + let data = Ptr !current_object in + let () = memory.(!current_object) <- String s in + let () = incr current_object in + data, None + | RBlock (tag, 0) -> + (* Atoms are never shared *) + let data = Atm tag in + data, None + | RBlock (tag, len) -> + let data = Ptr !current_object in + let nblock = Array.make len (Atm (-1)) in + let () = memory.(!current_object) <- Struct (tag, nblock) in + let () = incr current_object in + data, Some nblock + | RCode addr -> + let data = Fun addr in + data, None + in + + let rec fill block off accu = + if Array.length block = off then + match accu with + | [] -> () + | (block, off) :: accu -> fill block off accu + else + let data, nobj = fill_obj (parse_object chan) in + let () = block.(off) <- data in + let block, off, accu = match nobj with + | None -> block, succ off, accu + | Some nblock -> nblock, 0, ((block, succ off) :: accu) + in + fill block off accu + in + let ans = [|Atm (-1)|] in + let () = fill ans 0 [] in + (ans.(0), memory) + +end + +module IChannel = +struct + type t = in_channel + let input_byte = Pervasives.input_byte + let input_binary_int = Pervasives.input_binary_int +end + +module IString = +struct + type t = (string * int ref) + + let input_byte (s, off) = + let ans = Char.code (s.[!off]) in + let () = incr off in + ans + + let input_binary_int chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let ans = (i lsl 24) lor (j lsl 16) lor (k lsl 8) lor l in + if i land 0x80 = 0 + then ans + else ans lor ((-1) lsl 31) + +end + +module PChannel = Make(IChannel) +module PString = Make(IString) + +let parse_channel = PChannel.parse +let parse_string s = PString.parse (s, ref 0) diff --git a/checker/analyze.mli b/checker/analyze.mli new file mode 100644 index 000000000..42efcf01d --- /dev/null +++ b/checker/analyze.mli @@ -0,0 +1,35 @@ +type data = +| Int of int +| Ptr of int +| Atm of int (* tag *) +| Fun of int (* address *) + +type obj = +| Struct of int * data array (* tag × data *) +| String of string + +val parse_channel : in_channel -> (data * obj array) +val parse_string : string -> (data * obj array) + +(** {6 Functorized version} *) + +module type Input = +sig + type t + val input_byte : t -> int + (** Input a single byte *) + val input_binary_int : t -> int + (** Input a big-endian 31-bits signed integer *) +end +(** Type of inputs *) + +module type S = +sig + type input + val parse : input -> (data * obj array) + (** Return the entry point and the reification of the memory out of a + marshalled structure. *) +end + +module Make (M : Input) : S with type input = M.t +(** Functorized version of the previous code. *) diff --git a/checker/votour.ml b/checker/votour.ml index bb8e06702..4aecb28f2 100644 --- a/checker/votour.ml +++ b/checker/votour.ml @@ -21,32 +21,91 @@ sig type obj val input : in_channel -> obj val repr : obj -> obj repr - val size : int list -> int + val size : obj -> int end -module Repr : S = +module ReprObj : S = struct - type obj = Obj.t + type obj = Obj.t * int list let input chan = let obj = input_value chan in let () = CObj.register_shared_size obj in - obj + (obj, []) - let repr obj = + let repr (obj, pos) = if Obj.is_block obj then let tag = Obj.tag obj in if tag = Obj.string_tag then STRING (Obj.magic obj) else if tag < Obj.no_scan_tag then - let data = Obj.dup obj in - let () = Obj.set_tag data 0 in + let init i = (Obj.field obj i, i :: pos) in + let data = Array.init (Obj.size obj) init in BLOCK (tag, Obj.magic data) else OTHER else INT (Obj.magic obj) - let size p = CObj.shared_size_of_pos p + let size (_, p) = CObj.shared_size_of_pos p end +module ReprMem : S = +struct + open Analyze + + type obj = data + + let memory = ref [||] + let sizes = ref [||] + (** size, in words *) + + let ws = Sys.word_size / 8 + + let rec init_size seen = function + | Int _ | Atm _ | Fun _ -> 0 + | Ptr p -> + if seen.(p) then 0 + else + let () = seen.(p) <- true in + match (!memory).(p) with + | Struct (tag, os) -> + let fold accu o = accu + 1 + init_size seen o in + let size = Array.fold_left fold 1 os in + let () = (!sizes).(p) <- size in + size + | String s -> + let size = 2 + (String.length s / ws) in + let () = (!sizes).(p) <- size in + size + + let size = function + | Int _ | Atm _ | Fun _ -> 0 + | Ptr p -> (!sizes).(p) + + let repr = function + | Int i -> INT i + | Atm t -> BLOCK (t, [||]) + | Fun _ -> OTHER + | Ptr p -> + match (!memory).(p) with + | Struct (tag, os) -> BLOCK (tag, os) + | String s -> STRING s + + let input ch = + let obj, mem = parse_channel ch in + let () = memory := mem in + let () = sizes := Array.make (Array.length mem) (-1) in + let seen = Array.make (Array.length mem) false in + let _ = init_size seen obj in + obj + +end + +module Visit (Repr : S) : +sig + val init : unit -> unit + val visit : Values.value -> Repr.obj -> int list -> unit +end = +struct + (** Name of a value *) let rec get_name ?(extra=false) = function @@ -92,7 +151,7 @@ let rec get_details v o = match v, Repr.repr o with let node_info (v,o,p) = get_name ~extra:true v ^ get_details v o ^ - " (size "^ string_of_int (Repr.size p)^"w)" + " (size "^ string_of_int (Repr.size o)^"w)" (** Children of a block : type, object, position. For lists, we collect all elements of the list at once *) @@ -201,6 +260,8 @@ let rec visit v o pos = | Failure "forbidden" -> let info = pop () in visit info.typ info.obj info.pos | Failure _ | Invalid_argument _ -> visit v o pos +end + (** Loading the vo *) type header = { @@ -256,6 +317,12 @@ let visit_vo f = make_seg "STM tasks" (Opt Values.v_stm_seg); make_seg "opaque proofs" Values.v_opaques; |] in + let repr = + if Sys.word_size = 64 then (module ReprMem : S) else (module ReprObj : S) + (** On 32-bit machines, representation may exceed the max size of arrays *) + in + let module Repr = (val repr : S) in + let module Visit = Visit(Repr) in while true do let ch = open_in_bin f in let magic = input_binary_int ch in @@ -279,8 +346,8 @@ let visit_vo f = let seg = int_of_string l in seek_in ch segments.(seg).pos; let o = Repr.input ch in - let () = init () in - visit segments.(seg).typ o [] + let () = Visit.init () in + Visit.visit segments.(seg).typ o [] done let main = |