aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Pierre-Marie Pédrot <pierre-marie.pedrot@inria.fr>2015-03-24 21:01:42 +0100
committerGravatar Pierre-Marie Pédrot <pierre-marie.pedrot@inria.fr>2015-06-25 15:54:40 +0200
commit679e49dbdc1448c65122eecd24783eb35376d492 (patch)
tree5055b0e9039d91b6c8cc42b9f7d121ec7c381542
parent9c7b70032972ed613bcca02cceedcd33c240f7a8 (diff)
Adding a more efficient representation of OCaml objects in votour.
-rw-r--r--Makefile.build2
-rw-r--r--checker/analyze.ml350
-rw-r--r--checker/analyze.mli35
-rw-r--r--checker/votour.ml89
4 files changed, 464 insertions, 12 deletions
diff --git a/Makefile.build b/Makefile.build
index 92c0d46f2..16c573086 100644
--- a/Makefile.build
+++ b/Makefile.build
@@ -654,7 +654,7 @@ $(FAKEIDE): lib/clib$(BESTLIB) lib/xml_lexer$(BESTOBJ) lib/xml_parser$(BESTOBJ)
# votour: a small vo explorer (based on the checker)
-bin/votour: lib/cObj$(BESTOBJ) checker/values$(BESTOBJ) checker/votour.ml
+bin/votour: lib/cObj$(BESTOBJ) checker/analyze$(BESTOBJ) checker/values$(BESTOBJ) checker/votour.ml
$(SHOW)'OCAMLBEST -o $@'
$(HIDE)$(call bestocaml, -I checker,)
diff --git a/checker/analyze.ml b/checker/analyze.ml
new file mode 100644
index 000000000..c48b83011
--- /dev/null
+++ b/checker/analyze.ml
@@ -0,0 +1,350 @@
+(** Headers *)
+
+let prefix_small_block = 0x80
+let prefix_small_int = 0x40
+let prefix_small_string = 0x20
+
+let code_int8 = 0x00
+let code_int16 = 0x01
+let code_int32 = 0x02
+let code_int64 = 0x03
+let code_shared8 = 0x04
+let code_shared16 = 0x05
+let code_shared32 = 0x06
+let code_double_array32_little = 0x07
+let code_block32 = 0x08
+let code_string8 = 0x09
+let code_string32 = 0x0A
+let code_double_big = 0x0B
+let code_double_little = 0x0C
+let code_double_array8_big = 0x0D
+let code_double_array8_little = 0x0E
+let code_double_array32_big = 0x0F
+let code_codepointer = 0x10
+let code_infixpointer = 0x11
+let code_custom = 0x12
+let code_block64 = 0x13
+
+type code_descr =
+| CODE_INT8
+| CODE_INT16
+| CODE_INT32
+| CODE_INT64
+| CODE_SHARED8
+| CODE_SHARED16
+| CODE_SHARED32
+| CODE_DOUBLE_ARRAY32_LITTLE
+| CODE_BLOCK32
+| CODE_STRING8
+| CODE_STRING32
+| CODE_DOUBLE_BIG
+| CODE_DOUBLE_LITTLE
+| CODE_DOUBLE_ARRAY8_BIG
+| CODE_DOUBLE_ARRAY8_LITTLE
+| CODE_DOUBLE_ARRAY32_BIG
+| CODE_CODEPOINTER
+| CODE_INFIXPOINTER
+| CODE_CUSTOM
+| CODE_BLOCK64
+
+let code_max = 0x13
+
+let magic_number = "\132\149\166\190"
+
+(** Memory reification *)
+
+type repr =
+| RInt of int
+| RBlock of (int * int) (* tag × len *)
+| RString of string
+| RPointer of int
+| RCode of int
+
+type data =
+| Int of int (* value *)
+| Ptr of int (* pointer *)
+| Atm of int (* tag *)
+| Fun of int (* address *)
+
+type obj =
+| Struct of int * data array (* tag × data *)
+| String of string
+
+module type Input =
+sig
+ type t
+ val input_byte : t -> int
+ val input_binary_int : t -> int
+end
+
+module type S =
+sig
+ type input
+ val parse : input -> (data * obj array)
+end
+
+module Make(M : Input) =
+struct
+
+open M
+
+type input = M.t
+
+let current_offset = ref 0
+
+let input_byte chan =
+ let () = incr current_offset in
+ input_byte chan
+
+let input_binary_int chan =
+ let () = current_offset := !current_offset + 4 in
+ input_binary_int chan
+
+let input_char chan = Char.chr (input_byte chan)
+
+let parse_header chan =
+ let () = current_offset := 0 in
+ let magic = String.create 4 in
+ let () = for i = 0 to 3 do magic.[i] <- input_char chan done in
+ let length = input_binary_int chan in
+ let objects = input_binary_int chan in
+ let size32 = input_binary_int chan in
+ let size64 = input_binary_int chan in
+ (magic, length, size32, size64, objects)
+
+let input_int8s chan =
+ let i = input_byte chan in
+ if i land 0x80 = 0
+ then i
+ else i lor ((-1) lsl 8)
+
+let input_int8u = input_byte
+
+let input_int16s chan =
+ let i = input_byte chan in
+ let j = input_byte chan in
+ let ans = (i lsl 8) lor j in
+ if i land 0x80 = 0
+ then ans
+ else ans lor ((-1) lsl 16)
+
+let input_int16u chan =
+ let i = input_byte chan in
+ let j = input_byte chan in
+ (i lsl 8) lor j
+
+let input_int32s chan =
+ let i = input_byte chan in
+ let j = input_byte chan in
+ let k = input_byte chan in
+ let l = input_byte chan in
+ let ans = (i lsl 24) lor (j lsl 16) lor (k lsl 8) lor l in
+ if i land 0x80 = 0
+ then ans
+ else ans lor ((-1) lsl 31)
+
+let input_int32u chan =
+ let i = input_byte chan in
+ let j = input_byte chan in
+ let k = input_byte chan in
+ let l = input_byte chan in
+ (i lsl 24) lor (j lsl 16) lor (k lsl 8) lor l
+
+let input_int64s chan =
+ let i = input_byte chan in
+ let j = input_byte chan in
+ let k = input_byte chan in
+ let l = input_byte chan in
+ let m = input_byte chan in
+ let n = input_byte chan in
+ let o = input_byte chan in
+ let p = input_byte chan in
+ let ans =
+ (i lsl 56) lor (j lsl 48) lor (k lsl 40) lor (l lsl 32) lor
+ (m lsl 24) lor (n lsl 16) lor (o lsl 8) lor p
+ in
+ if i land 0x80 = 0
+ then ans
+ else ans lor ((-1) lsl 63)
+
+let input_int64u chan =
+ let i = input_byte chan in
+ let j = input_byte chan in
+ let k = input_byte chan in
+ let l = input_byte chan in
+ let m = input_byte chan in
+ let n = input_byte chan in
+ let o = input_byte chan in
+ let p = input_byte chan in
+ (i lsl 56) lor (j lsl 48) lor (k lsl 40) lor (l lsl 32) lor
+ (m lsl 24) lor (n lsl 16) lor (o lsl 8) lor p
+
+let input_header32 chan =
+ let i = input_byte chan in
+ let j = input_byte chan in
+ let k = input_byte chan in
+ let l = input_byte chan in
+ let tag = l in
+ let len = (i lsl 14) lor (j lsl 6) lor (k lsr 2) in
+ (tag, len)
+
+let input_header64 chan =
+ let i = input_byte chan in
+ let j = input_byte chan in
+ let k = input_byte chan in
+ let l = input_byte chan in
+ let m = input_byte chan in
+ let n = input_byte chan in
+ let o = input_byte chan in
+ let p = input_byte chan in
+ let tag = p in
+ let len =
+ (i lsl 46) lor (j lsl 38) lor (k lsl 30) lor (l lsl 22) lor
+ (m lsl 14) lor (n lsl 6) lor (o lsr 2)
+ in
+ (tag, len)
+
+let input_string len chan =
+ let ans = String.create len in
+ for i = 0 to pred len do
+ ans.[i] <- input_char chan;
+ done;
+ ans
+
+let parse_object chan =
+ let data = input_byte chan in
+ if prefix_small_block <= data then
+ let tag = data land 0x0F in
+ let len = (data lsr 4) land 0x07 in
+ RBlock (tag, len)
+ else if prefix_small_int <= data then
+ RInt (data land 0x3F)
+ else if prefix_small_string <= data then
+ let len = data land 0x1F in
+ RString (input_string len chan)
+ else if data > code_max then
+ assert false
+ else match (Obj.magic data) with
+ | CODE_INT8 ->
+ RInt (input_int8s chan)
+ | CODE_INT16 ->
+ RInt (input_int16s chan)
+ | CODE_INT32 ->
+ RInt (input_int32s chan)
+ | CODE_INT64 ->
+ RInt (input_int64s chan)
+ | CODE_SHARED8 ->
+ RPointer (input_int8u chan)
+ | CODE_SHARED16 ->
+ RPointer (input_int16u chan)
+ | CODE_SHARED32 ->
+ RPointer (input_int32u chan)
+ | CODE_BLOCK32 ->
+ RBlock (input_header32 chan)
+ | CODE_BLOCK64 ->
+ RBlock (input_header64 chan)
+ | CODE_STRING8 ->
+ let len = input_int8u chan in
+ RString (input_string len chan)
+ | CODE_STRING32 ->
+ let len = input_int32u chan in
+ RString (input_string len chan)
+ | CODE_CODEPOINTER ->
+ let addr = input_int32u chan in
+ for i = 0 to 15 do ignore (input_byte chan); done;
+ RCode addr
+ | CODE_DOUBLE_ARRAY32_LITTLE
+ | CODE_DOUBLE_BIG
+ | CODE_DOUBLE_LITTLE
+ | CODE_DOUBLE_ARRAY8_BIG
+ | CODE_DOUBLE_ARRAY8_LITTLE
+ | CODE_DOUBLE_ARRAY32_BIG
+ | CODE_INFIXPOINTER
+ | CODE_CUSTOM ->
+ Printf.eprintf "Unhandled code %04x\n%!" data; assert false
+
+let parse chan =
+ let (magic, len, _, _, size) = parse_header chan in
+ let () = assert (magic = magic_number) in
+ let memory = Array.make size (Struct ((-1), [||])) in
+ let current_object = ref 0 in
+ let fill_obj = function
+ | RPointer n ->
+ let data = Ptr (!current_object - n) in
+ data, None
+ | RInt n ->
+ let data = Int n in
+ data, None
+ | RString s ->
+ let data = Ptr !current_object in
+ let () = memory.(!current_object) <- String s in
+ let () = incr current_object in
+ data, None
+ | RBlock (tag, 0) ->
+ (* Atoms are never shared *)
+ let data = Atm tag in
+ data, None
+ | RBlock (tag, len) ->
+ let data = Ptr !current_object in
+ let nblock = Array.make len (Atm (-1)) in
+ let () = memory.(!current_object) <- Struct (tag, nblock) in
+ let () = incr current_object in
+ data, Some nblock
+ | RCode addr ->
+ let data = Fun addr in
+ data, None
+ in
+
+ let rec fill block off accu =
+ if Array.length block = off then
+ match accu with
+ | [] -> ()
+ | (block, off) :: accu -> fill block off accu
+ else
+ let data, nobj = fill_obj (parse_object chan) in
+ let () = block.(off) <- data in
+ let block, off, accu = match nobj with
+ | None -> block, succ off, accu
+ | Some nblock -> nblock, 0, ((block, succ off) :: accu)
+ in
+ fill block off accu
+ in
+ let ans = [|Atm (-1)|] in
+ let () = fill ans 0 [] in
+ (ans.(0), memory)
+
+end
+
+module IChannel =
+struct
+ type t = in_channel
+ let input_byte = Pervasives.input_byte
+ let input_binary_int = Pervasives.input_binary_int
+end
+
+module IString =
+struct
+ type t = (string * int ref)
+
+ let input_byte (s, off) =
+ let ans = Char.code (s.[!off]) in
+ let () = incr off in
+ ans
+
+ let input_binary_int chan =
+ let i = input_byte chan in
+ let j = input_byte chan in
+ let k = input_byte chan in
+ let l = input_byte chan in
+ let ans = (i lsl 24) lor (j lsl 16) lor (k lsl 8) lor l in
+ if i land 0x80 = 0
+ then ans
+ else ans lor ((-1) lsl 31)
+
+end
+
+module PChannel = Make(IChannel)
+module PString = Make(IString)
+
+let parse_channel = PChannel.parse
+let parse_string s = PString.parse (s, ref 0)
diff --git a/checker/analyze.mli b/checker/analyze.mli
new file mode 100644
index 000000000..42efcf01d
--- /dev/null
+++ b/checker/analyze.mli
@@ -0,0 +1,35 @@
+type data =
+| Int of int
+| Ptr of int
+| Atm of int (* tag *)
+| Fun of int (* address *)
+
+type obj =
+| Struct of int * data array (* tag × data *)
+| String of string
+
+val parse_channel : in_channel -> (data * obj array)
+val parse_string : string -> (data * obj array)
+
+(** {6 Functorized version} *)
+
+module type Input =
+sig
+ type t
+ val input_byte : t -> int
+ (** Input a single byte *)
+ val input_binary_int : t -> int
+ (** Input a big-endian 31-bits signed integer *)
+end
+(** Type of inputs *)
+
+module type S =
+sig
+ type input
+ val parse : input -> (data * obj array)
+ (** Return the entry point and the reification of the memory out of a
+ marshalled structure. *)
+end
+
+module Make (M : Input) : S with type input = M.t
+(** Functorized version of the previous code. *)
diff --git a/checker/votour.ml b/checker/votour.ml
index bb8e06702..4aecb28f2 100644
--- a/checker/votour.ml
+++ b/checker/votour.ml
@@ -21,32 +21,91 @@ sig
type obj
val input : in_channel -> obj
val repr : obj -> obj repr
- val size : int list -> int
+ val size : obj -> int
end
-module Repr : S =
+module ReprObj : S =
struct
- type obj = Obj.t
+ type obj = Obj.t * int list
let input chan =
let obj = input_value chan in
let () = CObj.register_shared_size obj in
- obj
+ (obj, [])
- let repr obj =
+ let repr (obj, pos) =
if Obj.is_block obj then
let tag = Obj.tag obj in
if tag = Obj.string_tag then STRING (Obj.magic obj)
else if tag < Obj.no_scan_tag then
- let data = Obj.dup obj in
- let () = Obj.set_tag data 0 in
+ let init i = (Obj.field obj i, i :: pos) in
+ let data = Array.init (Obj.size obj) init in
BLOCK (tag, Obj.magic data)
else OTHER
else INT (Obj.magic obj)
- let size p = CObj.shared_size_of_pos p
+ let size (_, p) = CObj.shared_size_of_pos p
end
+module ReprMem : S =
+struct
+ open Analyze
+
+ type obj = data
+
+ let memory = ref [||]
+ let sizes = ref [||]
+ (** size, in words *)
+
+ let ws = Sys.word_size / 8
+
+ let rec init_size seen = function
+ | Int _ | Atm _ | Fun _ -> 0
+ | Ptr p ->
+ if seen.(p) then 0
+ else
+ let () = seen.(p) <- true in
+ match (!memory).(p) with
+ | Struct (tag, os) ->
+ let fold accu o = accu + 1 + init_size seen o in
+ let size = Array.fold_left fold 1 os in
+ let () = (!sizes).(p) <- size in
+ size
+ | String s ->
+ let size = 2 + (String.length s / ws) in
+ let () = (!sizes).(p) <- size in
+ size
+
+ let size = function
+ | Int _ | Atm _ | Fun _ -> 0
+ | Ptr p -> (!sizes).(p)
+
+ let repr = function
+ | Int i -> INT i
+ | Atm t -> BLOCK (t, [||])
+ | Fun _ -> OTHER
+ | Ptr p ->
+ match (!memory).(p) with
+ | Struct (tag, os) -> BLOCK (tag, os)
+ | String s -> STRING s
+
+ let input ch =
+ let obj, mem = parse_channel ch in
+ let () = memory := mem in
+ let () = sizes := Array.make (Array.length mem) (-1) in
+ let seen = Array.make (Array.length mem) false in
+ let _ = init_size seen obj in
+ obj
+
+end
+
+module Visit (Repr : S) :
+sig
+ val init : unit -> unit
+ val visit : Values.value -> Repr.obj -> int list -> unit
+end =
+struct
+
(** Name of a value *)
let rec get_name ?(extra=false) = function
@@ -92,7 +151,7 @@ let rec get_details v o = match v, Repr.repr o with
let node_info (v,o,p) =
get_name ~extra:true v ^ get_details v o ^
- " (size "^ string_of_int (Repr.size p)^"w)"
+ " (size "^ string_of_int (Repr.size o)^"w)"
(** Children of a block : type, object, position.
For lists, we collect all elements of the list at once *)
@@ -201,6 +260,8 @@ let rec visit v o pos =
| Failure "forbidden" -> let info = pop () in visit info.typ info.obj info.pos
| Failure _ | Invalid_argument _ -> visit v o pos
+end
+
(** Loading the vo *)
type header = {
@@ -256,6 +317,12 @@ let visit_vo f =
make_seg "STM tasks" (Opt Values.v_stm_seg);
make_seg "opaque proofs" Values.v_opaques;
|] in
+ let repr =
+ if Sys.word_size = 64 then (module ReprMem : S) else (module ReprObj : S)
+ (** On 32-bit machines, representation may exceed the max size of arrays *)
+ in
+ let module Repr = (val repr : S) in
+ let module Visit = Visit(Repr) in
while true do
let ch = open_in_bin f in
let magic = input_binary_int ch in
@@ -279,8 +346,8 @@ let visit_vo f =
let seg = int_of_string l in
seek_in ch segments.(seg).pos;
let o = Repr.input ch in
- let () = init () in
- visit segments.(seg).typ o []
+ let () = Visit.init () in
+ Visit.visit segments.(seg).typ o []
done
let main =