From 5b7eafd0f00a16d78f99a27f5c7d5a0de77dc7e6 Mon Sep 17 00:00:00 2001 From: Stephane Glondu Date: Wed, 21 Jul 2010 09:46:51 +0200 Subject: Imported Upstream snapshot 8.3~beta0+13298 --- tools/coqdoc/tokens.ml | 171 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 tools/coqdoc/tokens.ml (limited to 'tools/coqdoc/tokens.ml') diff --git a/tools/coqdoc/tokens.ml b/tools/coqdoc/tokens.ml new file mode 100644 index 00000000..c2a47308 --- /dev/null +++ b/tools/coqdoc/tokens.ml @@ -0,0 +1,171 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* None with + | Some tt' -> + CharMap.add c (insert tt' (i + 1)) (CharMap.remove c tt.branch) + | None -> + let tt' = {node = None; branch = CharMap.empty} in + CharMap.add c (insert tt' (i + 1)) tt.branch + in + { node = tt.node; branch = br } + in + insert ttree 0 + +(* Removes a string from a dictionary: returns an equal dictionary + if the word not present. *) +let ttree_remove ttree str = + let rec remove tt i = + if i == String.length str then + {node = None; branch = tt.branch} + else + let c = str.[i] in + let br = + match try Some (CharMap.find c tt.branch) with Not_found -> None with + | Some tt' -> + CharMap.add c (remove tt' (i + 1)) (CharMap.remove c tt.branch) + | None -> tt.branch + in + { node = tt.node; branch = br } + in + remove ttree 0 + +let ttree_descend ttree c = CharMap.find c ttree.branch + +let ttree_find ttree str = + let rec proc_rec tt i = + if i == String.length str then tt + else proc_rec (CharMap.find str.[i] tt.branch) (i+1) + in + proc_rec ttree 0 + +(*s Parameters of the translation automaton *) + +type out_function = bool -> bool -> Index.index_entry option -> string -> unit + +let token_tree = ref (ref empty_ttree) +let outfun = ref (fun _ _ _ _ -> failwith "outfun not initialized") + +(*s Translation automaton *) + +let buff = Buffer.create 4 + +let flush_buffer was_symbolchar tag tok = + let hastr = String.length tok <> 0 in + if hastr then !outfun false was_symbolchar tag tok; + if Buffer.length buff <> 0 then + !outfun true (if hastr then not was_symbolchar else was_symbolchar) + tag (Buffer.contents buff); + Buffer.clear buff + +type sublexer_state = + | Neutral + | Buffering of bool * Index.index_entry option * string * ttree + +let translation_state = ref Neutral + +let buffer_char is_symbolchar ctag c = + let rec aux = function + | Neutral -> + restart_buffering () + | Buffering (was_symbolchar,tag,translated,tt) -> + if tag <> ctag then + (* A strong tag comes from Coq; if different Coq tags *) + (* hence, we don't try to see the chars as part of a single token *) + let translated = + match tt.node with + | Some tok -> Buffer.clear buff; tok + | None -> translated in + flush_buffer was_symbolchar tag translated; + restart_buffering () + else + begin + (* If we change the category of characters (symbol vs ident) *) + (* we accept this as a possible token cut point and remember the *) + (* translated token up to that point *) + let translated = + if is_symbolchar <> was_symbolchar then + match tt.node with + | Some tok -> Buffer.clear buff; tok + | None -> translated + else translated in + (* We try to make a significant token from the current *) + (* buffer and the new character *) + try + let tt = ttree_descend tt c in + Buffer.add_char buff c; + Buffering (is_symbolchar,ctag,translated,tt) + with Not_found -> + (* No existing translation for the given set of chars *) + if is_symbolchar <> was_symbolchar then + (* If we changed the category of character read, we accept it *) + (* as a possible cut point and restart looking for a translation *) + (flush_buffer was_symbolchar tag translated; + restart_buffering ()) + else + (* If we did not change the category of character read, we do *) + (* not want to cut arbitrarily in the middle of the sequence of *) + (* symbol characters or identifier characters *) + (Buffer.add_char buff c; + Buffering (is_symbolchar,tag,translated,empty_ttree)) + end + + and restart_buffering () = + let tt = try ttree_descend !(!token_tree) c with Not_found -> empty_ttree in + Buffer.add_char buff c; + Buffering (is_symbolchar,ctag,"",tt) + + in + translation_state := aux !translation_state + +let output_tagged_ident_string s = + for i = 0 to String.length s - 1 do buffer_char false None s.[i] done + +let output_tagged_symbol_char tag c = + buffer_char true tag c + +let flush_sublexer () = + match !translation_state with + | Neutral -> () + | Buffering (was_symbolchar,tag,translated,tt) -> + let translated = + match tt.node with + | Some tok -> Buffer.clear buff; tok + | None -> translated in + flush_buffer was_symbolchar tag translated; + translation_state := Neutral + +(* Translation not using the automaton *) +let translate s = + try (ttree_find !(!token_tree) s).node with Not_found -> None -- cgit v1.2.3