aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--interp/notation.ml6
-rw-r--r--lib/unicode.ml7
-rw-r--r--lib/unicode.mli3
3 files changed, 14 insertions, 2 deletions
diff --git a/interp/notation.ml b/interp/notation.ml
index 5c10e0af7..c4addbf10 100644
--- a/interp/notation.ml
+++ b/interp/notation.ml
@@ -314,7 +314,9 @@ let declare_prim_token_interpreter sc interp (patl,uninterp,b) =
patl
let mkNumeral n = Numeral n
-let mkString s = String s
+let mkString = function
+| None -> None
+| Some s -> if Unicode.is_utf8 s then Some (String s) else None
let delay dir int loc x = (dir, (fun () -> int loc x))
@@ -326,7 +328,7 @@ let declare_numeral_interpreter sc dir interp (patl,uninterp,inpat) =
let declare_string_interpreter sc dir interp (patl,uninterp,inpat) =
declare_prim_token_interpreter sc
(fun cont loc -> function String s -> delay dir interp loc s | p -> cont loc p)
- (patl, (fun r -> Option.map mkString (uninterp r)), inpat)
+ (patl, (fun r -> mkString (uninterp r)), inpat)
let check_required_module loc sc (sp,d) =
try let _ = Nametab.global_of_path sp in ()
diff --git a/lib/unicode.ml b/lib/unicode.ml
index 1765e93dc..cfaa73cc1 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -168,6 +168,13 @@ let next_utf8 s i =
(c land 0x3F) lsl 6 + (d land 0x3F)
else err ()
+let is_utf8 s =
+ let rec check i =
+ let (off, _) = next_utf8 s i in
+ check (i + off)
+ in
+ try check 0 with End_of_input -> true | Invalid_argument _ -> false
+
(* Check the well-formedness of an identifier *)
let initial_refutation j n s =
diff --git a/lib/unicode.mli b/lib/unicode.mli
index 520203d43..65e75a20d 100644
--- a/lib/unicode.mli
+++ b/lib/unicode.mli
@@ -26,3 +26,6 @@ val lowercase_first_char : string -> string
(** For extraction, turn a unicode string into an ascii-only one *)
val is_basic_ascii : string -> bool
val ascii_of_ident : string -> string
+
+(** Validate an UTF-8 string *)
+val is_utf8 : string -> bool