blob: 7e6484e1979d22fdf56290b2039c40d70f17686f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
(* $Id: utf8_convert.mll 5920 2004-07-16 20:01:26Z herbelin $ *)
{
open Lexing
let b = Buffer.create 127
}
(* Replace all occurences of \x{iiii} and \x{iiiiiiii} by UTF-8 valid chars *)
let digit = ['0'-'9''A'-'Z''a'-'z']
let short = digit digit digit digit
let long = short short
rule entry = parse
| "\\x{" (short | long ) '}'
{ let s = lexeme lexbuf in
let n = String.length s in
let code =
try Glib.Utf8.from_unichar
(int_of_string ("0x"^(String.sub s 3 (n - 4))))
with _ -> s
in
let c = if Glib.Utf8.validate code then code else s in
Buffer.add_string b c;
entry lexbuf
}
| _
{ let s = lexeme lexbuf in
Buffer.add_string b s;
entry lexbuf}
| eof
{
let s = Buffer.contents b in Buffer.reset b ; s
}
{
let f s =
let lb = from_string s in
Buffer.reset b;
entry lb
}
|