summaryrefslogtreecommitdiff
path: root/tools/coqdoc/tokens.ml
diff options
context:
space:
mode:
Diffstat (limited to 'tools/coqdoc/tokens.ml')
-rw-r--r--tools/coqdoc/tokens.ml171
1 files changed, 171 insertions, 0 deletions
diff --git a/tools/coqdoc/tokens.ml b/tools/coqdoc/tokens.ml
new file mode 100644
index 00000000..c2a47308
--- /dev/null
+++ b/tools/coqdoc/tokens.ml
@@ -0,0 +1,171 @@
+(************************************************************************)
+(* v * The Coq Proof Assistant / The Coq Development Team *)
+(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(************************************************************************)
+
+(* Application of printing rules based on a dictionary specific to the
+ target language *)
+
+open Cdglobals
+
+(*s Dictionaries: trees annotated with string options, each node being a map
+ from chars to dictionaries (the subtrees). A trie, in other words.
+
+ (code copied from parsing/lexer.ml4 for the use of coqdoc, Apr 2010)
+*)
+
+module CharMap = Map.Make (struct type t = char let compare = compare end)
+
+type ttree = {
+ node : string option;
+ branch : ttree CharMap.t }
+
+let empty_ttree = { node = None; branch = CharMap.empty }
+
+let ttree_add ttree str translated =
+ let rec insert tt i =
+ if i == String.length str then
+ {node = Some translated; branch = tt.branch}
+ else
+ let c = str.[i] in
+ let br =
+ match try Some (CharMap.find c tt.branch) with Not_found -> None with
+ | Some tt' ->
+ CharMap.add c (insert tt' (i + 1)) (CharMap.remove c tt.branch)
+ | None ->
+ let tt' = {node = None; branch = CharMap.empty} in
+ CharMap.add c (insert tt' (i + 1)) tt.branch
+ in
+ { node = tt.node; branch = br }
+ in
+ insert ttree 0
+
+(* Removes a string from a dictionary: returns an equal dictionary
+ if the word not present. *)
+let ttree_remove ttree str =
+ let rec remove tt i =
+ if i == String.length str then
+ {node = None; branch = tt.branch}
+ else
+ let c = str.[i] in
+ let br =
+ match try Some (CharMap.find c tt.branch) with Not_found -> None with
+ | Some tt' ->
+ CharMap.add c (remove tt' (i + 1)) (CharMap.remove c tt.branch)
+ | None -> tt.branch
+ in
+ { node = tt.node; branch = br }
+ in
+ remove ttree 0
+
+let ttree_descend ttree c = CharMap.find c ttree.branch
+
+let ttree_find ttree str =
+ let rec proc_rec tt i =
+ if i == String.length str then tt
+ else proc_rec (CharMap.find str.[i] tt.branch) (i+1)
+ in
+ proc_rec ttree 0
+
+(*s Parameters of the translation automaton *)
+
+type out_function = bool -> bool -> Index.index_entry option -> string -> unit
+
+let token_tree = ref (ref empty_ttree)
+let outfun = ref (fun _ _ _ _ -> failwith "outfun not initialized")
+
+(*s Translation automaton *)
+
+let buff = Buffer.create 4
+
+let flush_buffer was_symbolchar tag tok =
+ let hastr = String.length tok <> 0 in
+ if hastr then !outfun false was_symbolchar tag tok;
+ if Buffer.length buff <> 0 then
+ !outfun true (if hastr then not was_symbolchar else was_symbolchar)
+ tag (Buffer.contents buff);
+ Buffer.clear buff
+
+type sublexer_state =
+ | Neutral
+ | Buffering of bool * Index.index_entry option * string * ttree
+
+let translation_state = ref Neutral
+
+let buffer_char is_symbolchar ctag c =
+ let rec aux = function
+ | Neutral ->
+ restart_buffering ()
+ | Buffering (was_symbolchar,tag,translated,tt) ->
+ if tag <> ctag then
+ (* A strong tag comes from Coq; if different Coq tags *)
+ (* hence, we don't try to see the chars as part of a single token *)
+ let translated =
+ match tt.node with
+ | Some tok -> Buffer.clear buff; tok
+ | None -> translated in
+ flush_buffer was_symbolchar tag translated;
+ restart_buffering ()
+ else
+ begin
+ (* If we change the category of characters (symbol vs ident) *)
+ (* we accept this as a possible token cut point and remember the *)
+ (* translated token up to that point *)
+ let translated =
+ if is_symbolchar <> was_symbolchar then
+ match tt.node with
+ | Some tok -> Buffer.clear buff; tok
+ | None -> translated
+ else translated in
+ (* We try to make a significant token from the current *)
+ (* buffer and the new character *)
+ try
+ let tt = ttree_descend tt c in
+ Buffer.add_char buff c;
+ Buffering (is_symbolchar,ctag,translated,tt)
+ with Not_found ->
+ (* No existing translation for the given set of chars *)
+ if is_symbolchar <> was_symbolchar then
+ (* If we changed the category of character read, we accept it *)
+ (* as a possible cut point and restart looking for a translation *)
+ (flush_buffer was_symbolchar tag translated;
+ restart_buffering ())
+ else
+ (* If we did not change the category of character read, we do *)
+ (* not want to cut arbitrarily in the middle of the sequence of *)
+ (* symbol characters or identifier characters *)
+ (Buffer.add_char buff c;
+ Buffering (is_symbolchar,tag,translated,empty_ttree))
+ end
+
+ and restart_buffering () =
+ let tt = try ttree_descend !(!token_tree) c with Not_found -> empty_ttree in
+ Buffer.add_char buff c;
+ Buffering (is_symbolchar,ctag,"",tt)
+
+ in
+ translation_state := aux !translation_state
+
+let output_tagged_ident_string s =
+ for i = 0 to String.length s - 1 do buffer_char false None s.[i] done
+
+let output_tagged_symbol_char tag c =
+ buffer_char true tag c
+
+let flush_sublexer () =
+ match !translation_state with
+ | Neutral -> ()
+ | Buffering (was_symbolchar,tag,translated,tt) ->
+ let translated =
+ match tt.node with
+ | Some tok -> Buffer.clear buff; tok
+ | None -> translated in
+ flush_buffer was_symbolchar tag translated;
+ translation_state := Neutral
+
+(* Translation not using the automaton *)
+let translate s =
+ try (ttree_find !(!token_tree) s).node with Not_found -> None