diff options
Diffstat (limited to 'tools/coqdoc/tokens.mli')
-rw-r--r-- | tools/coqdoc/tokens.mli | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/tools/coqdoc/tokens.mli b/tools/coqdoc/tokens.mli new file mode 100644 index 000000000..4e53108ac --- /dev/null +++ b/tools/coqdoc/tokens.mli @@ -0,0 +1,78 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +(* Type of dictionaries *) + +type ttree + +val empty_ttree : ttree + +(* Add a string with some translation in dictionary *) +val ttree_add : ttree -> string -> string -> ttree + +(* Remove a translation from a dictionary: returns an equal dictionary + if the word not present *) +val ttree_remove : ttree -> string -> ttree + +(* Translate a string *) +val translate : string -> string option + +(* Sublexer automaton *) + +(* The sublexer buffers the chars it receives; if after some time, it + recognizes that a sequence of chars has a translation in the + current dictionary, it replaces the buffer by the translation *) + +(* Received chars can come with a "tag" (usually made from + informations from the globalization file). A sequence of chars can + be considered a word only, if all chars have the same "tag". Rules + for cutting words are the following: + + - in a sequence like "**" where * is in the dictionary but not **, + "**" is not translated; otherwise said, to be translated, a sequence + must not be surrounded by other symbol-like chars + + - in a sequence like "<>_h*", where <>_h is in the dictionary, the + translation is done because the switch from a letter to a symbol char + is an acceptable cutting point + + - in a sequence like "<>_ha", where <>_h is in the dictionary, the + translation is not done because it is considered that h and a are + not separable (however, if h and a have different tags, and h has + the same tags as <, > and _, the translation happens) + + - in a sequence like "<>_ha", where <> but not <>_h is in the + dictionary, the translation is done for <> and _ha is considered + independently because the switch from a symbol char to a letter + is considered to be an acceptable cutting point + + - the longest-word rule applies: if both <> and <>_h are in the + dictionary, "<>_h" is one word and gets translated +*) + +(* Warning: do not output anything on output channel inbetween a call + to [output_tagged_*] and [flush_sublexer]!! *) + +type out_function = + bool (* needs escape *) -> + bool (* it is a symbol, not a pure ident *) -> + Index.index_entry option (* the index type of the token if any *) -> + string -> unit + +(* This must be initialized before calling the sublexer *) +val token_tree : ttree ref +val outfun : out_function ref + +(* Process an ident part that might be a symbol part *) +val output_tagged_ident_string : string -> unit + +(* Process a non-ident char (possibly equipped with a tag) *) +val output_tagged_symbol_char : Index.index_entry option -> char -> unit + +(* Flush the buffered content of the lexer using [outfun] *) +val flush_sublexer : unit -> unit |