1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
(* Type of dictionaries *)
type ttree
val empty_ttree : ttree
(* Add a string with some translation in dictionary *)
val ttree_add : ttree -> string -> string -> ttree
(* Remove a translation from a dictionary: returns an equal dictionary
if the word not present *)
val ttree_remove : ttree -> string -> ttree
(* Translate a string *)
val translate : string -> string option
(* Sublexer automaton *)
(* The sublexer buffers the chars it receives; if after some time, it
recognizes that a sequence of chars has a translation in the
current dictionary, it replaces the buffer by the translation *)
(* Received chars can come with a "tag" (usually made from
informations from the globalization file). A sequence of chars can
be considered a word only, if all chars have the same "tag". Rules
for cutting words are the following:
- in a sequence like "**" where * is in the dictionary but not **,
"**" is not translated; otherwise said, to be translated, a sequence
must not be surrounded by other symbol-like chars
- in a sequence like "<>_h*", where <>_h is in the dictionary, the
translation is done because the switch from a letter to a symbol char
is an acceptable cutting point
- in a sequence like "<>_ha", where <>_h is in the dictionary, the
translation is not done because it is considered that h and a are
not separable (however, if h and a have different tags, and h has
the same tags as <, > and _, the translation happens)
- in a sequence like "<>_ha", where <> but not <>_h is in the
dictionary, the translation is done for <> and _ha is considered
independently because the switch from a symbol char to a letter
is considered to be an acceptable cutting point
- the longest-word rule applies: if both <> and <>_h are in the
dictionary, "<>_h" is one word and gets translated
*)
(* Warning: do not output anything on output channel inbetween a call
to [output_tagged_*] and [flush_sublexer]!! *)
type out_function =
bool (* needs escape *) ->
bool (* it is a symbol, not a pure ident *) ->
Index.index_entry option (* the index type of the token if any *) ->
string -> unit
(* This must be initialized before calling the sublexer *)
val token_tree : ttree ref ref
val outfun : out_function ref
(* Process an ident part that might be a symbol part *)
val output_tagged_ident_string : string -> unit
(* Process a non-ident char (possibly equipped with a tag) *)
val output_tagged_symbol_char : Index.index_entry option -> char -> unit
(* Flush the buffered content of the lexer using [outfun] *)
val flush_sublexer : unit -> unit
|