summaryrefslogtreecommitdiff
path: root/tools/coqdoc/tokens.ml
blob: a93ae8557978ca2964778544353b29c8decb5c26 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
(************************************************************************)
(*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
(* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2015     *)
(*   \VV/  **************************************************************)
(*    //   *      This file is distributed under the terms of the       *)
(*         *       GNU Lesser General Public License Version 2.1        *)
(************************************************************************)

(* Application of printing rules based on a dictionary specific to the
   target language *)

(*s Dictionaries: trees annotated with string options, each node being a map
    from chars to dictionaries (the subtrees). A trie, in other words.

    (code copied from parsing/lexer.ml4 for the use of coqdoc, Apr 2010)
*)

module CharMap = Map.Make (struct
  type t = char
  let compare (x : t) (y : t) = compare x y
end)

type ttree = {
  node : string option;
  branch : ttree CharMap.t }

let empty_ttree = { node = None; branch = CharMap.empty }

let ttree_add ttree str translated =
  let rec insert tt i =
    if i == String.length str then
      {node = Some translated; branch = tt.branch}
    else
      let c = str.[i] in
      let br =
        match try Some (CharMap.find c tt.branch) with Not_found -> None with
          | Some tt' ->
              CharMap.add c (insert tt' (i + 1)) (CharMap.remove c tt.branch)
          | None ->
              let tt' = {node = None; branch = CharMap.empty} in
              CharMap.add c (insert tt' (i + 1)) tt.branch
      in
      { node = tt.node; branch = br }
  in
  insert ttree 0

(* Removes a string from a dictionary: returns an equal dictionary
   if the word not present. *)
let ttree_remove ttree str =
  let rec remove tt i =
    if i == String.length str then
      {node = None; branch = tt.branch}
    else
      let c = str.[i] in
      let br =
        match try Some (CharMap.find c tt.branch) with Not_found -> None with
          | Some tt' ->
              CharMap.add c (remove tt' (i + 1)) (CharMap.remove c tt.branch)
          | None -> tt.branch
      in
      { node = tt.node; branch = br }
  in
  remove ttree 0

let ttree_descend ttree c = CharMap.find c ttree.branch

let ttree_find ttree str =
  let rec proc_rec tt i =
    if i == String.length str then tt
    else proc_rec (CharMap.find str.[i] tt.branch) (i+1)
  in
  proc_rec ttree 0

(*s Parameters of the translation automaton *)

type out_function = bool -> bool -> Index.index_entry option -> string -> unit

let token_tree = ref (ref empty_ttree)
let outfun = ref (fun _ _ _ _ -> failwith "outfun not initialized")

(*s Translation automaton *)

let buff = Buffer.create 4

let flush_buffer was_symbolchar tag tok =
  let hastr = String.length tok <> 0 in
  if hastr then !outfun false was_symbolchar tag tok;
  if Buffer.length buff <> 0 then
    !outfun true (if hastr then not was_symbolchar else was_symbolchar)
      tag (Buffer.contents buff);
  Buffer.clear buff

type sublexer_state =
  | Neutral
  | Buffering of bool * Index.index_entry option * string * ttree

let translation_state = ref Neutral

let buffer_char is_symbolchar ctag c =
  let rec aux = function
  | Neutral ->
      restart_buffering ()
  | Buffering (was_symbolchar,tag,translated,tt) ->
      if tag <> ctag then
	(* A strong tag comes from Coq; if different Coq tags *)
	(* hence, we don't try to see the chars as part of a single token *)
	let translated =
	  match tt.node with
	  | Some tok -> Buffer.clear buff; tok
	  | None -> translated in
	flush_buffer was_symbolchar tag translated;
	restart_buffering ()
      else
	begin
	(* If we change the category of characters (symbol vs ident) *)
	(* we accept this as a possible token cut point and remember the *)
	(* translated token up to that point *)
	let translated =
	  if is_symbolchar <> was_symbolchar then
	    match tt.node with
	    | Some tok -> Buffer.clear buff; tok
	    | None -> translated
	  else translated in
	(* We try to make a significant token from the current *)
	(* buffer and the new character *)
	try
	  let tt = ttree_descend tt c in
	  Buffer.add_char buff c;
	  Buffering (is_symbolchar,ctag,translated,tt)
	with Not_found ->
	(* No existing translation for the given set of chars *)
	if is_symbolchar <> was_symbolchar then
	  (* If we changed the category of character read, we accept it *)
	  (* as a possible cut point and restart looking for a translation *)
	  (flush_buffer was_symbolchar tag translated;
	   restart_buffering ())
	else
	  (* If we did not change the category of character read, we do *)
	  (* not want to cut arbitrarily in the middle of the sequence of *)
	  (*  symbol characters or identifier characters *)
	  (Buffer.add_char buff c;
	   Buffering (is_symbolchar,tag,translated,empty_ttree))
	end

  and restart_buffering () =
    let tt = try ttree_descend !(!token_tree) c with Not_found -> empty_ttree in
    Buffer.add_char buff c;
    Buffering (is_symbolchar,ctag,"",tt)

  in
  translation_state := aux !translation_state

let output_tagged_ident_string s =
  for i = 0 to String.length s - 1 do buffer_char false None s.[i] done

let output_tagged_symbol_char tag c =
  buffer_char true tag c

let flush_sublexer () =
  match !translation_state with
  | Neutral -> ()
  | Buffering (was_symbolchar,tag,translated,tt) ->
      let translated =
	match tt.node with
	| Some tok -> Buffer.clear buff; tok
	| None -> translated in
      flush_buffer was_symbolchar tag translated;
      translation_state := Neutral

(* Translation not using the automaton *)
let translate s =
  try (ttree_find !(!token_tree) s).node with Not_found -> None