aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib
diff options
context:
space:
mode:
authorGravatar Hugo Herbelin <Hugo.Herbelin@inria.fr>2017-09-12 17:15:06 +0200
committerGravatar Hugo Herbelin <Hugo.Herbelin@inria.fr>2017-09-13 18:59:32 +0200
commit2938fceb50b71d4784d6d718021c505c00196f50 (patch)
tree44068df44063367b6a2554f1234a435aeec5e84b /lib
parent240c8bffaa788669cf3135c95d067cc7b11b5da1 (diff)
Complying more precisely to unicode standard.
In particular, checking that it is at most 4 bytes.
Diffstat (limited to 'lib')
-rw-r--r--lib/unicode.ml8
1 files changed, 2 insertions, 6 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml
index a7132f62f..8eb2eb45d 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -301,9 +301,7 @@ let utf8_length s =
| '\192'..'\223' -> nc := 1 (* expect 1 continuation byte *)
| '\224'..'\239' -> nc := 2 (* expect 2 continuation bytes *)
| '\240'..'\247' -> nc := 3 (* expect 3 continuation bytes *)
- | '\248'..'\251' -> nc := 4 (* expect 4 continuation bytes *)
- | '\252'..'\253' -> nc := 5 (* expect 5 continuation bytes *)
- | '\254'..'\255' -> nc := 0 (* invalid byte *)
+ | '\248'..'\255' -> nc := 0 (* invalid byte *)
end ;
incr p ;
while !p < len && !nc > 0 do
@@ -332,9 +330,7 @@ let utf8_sub s start_u len_u =
| '\192'..'\223' -> nc := 1 (* expect 1 continuation byte *)
| '\224'..'\239' -> nc := 2 (* expect 2 continuation bytes *)
| '\240'..'\247' -> nc := 3 (* expect 3 continuation bytes *)
- | '\248'..'\251' -> nc := 4 (* expect 4 continuation bytes *)
- | '\252'..'\253' -> nc := 5 (* expect 5 continuation bytes *)
- | '\254'..'\255' -> nc := 0 (* invalid byte *)
+ | '\248'..'\255' -> nc := 0 (* invalid byte *)
end ;
incr p ;
while !p < len_b && !nc > 0 do