diff options
author | Hugo Herbelin <Hugo.Herbelin@inria.fr> | 2017-09-12 17:15:06 +0200 |
---|---|---|
committer | Hugo Herbelin <Hugo.Herbelin@inria.fr> | 2017-09-13 18:59:32 +0200 |
commit | 2938fceb50b71d4784d6d718021c505c00196f50 (patch) | |
tree | 44068df44063367b6a2554f1234a435aeec5e84b /lib | |
parent | 240c8bffaa788669cf3135c95d067cc7b11b5da1 (diff) |
Complying more precisely to unicode standard.
In particular, checking that it is at most 4 bytes.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/unicode.ml | 8 |
1 files changed, 2 insertions, 6 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml index a7132f62f..8eb2eb45d 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -301,9 +301,7 @@ let utf8_length s = | '\192'..'\223' -> nc := 1 (* expect 1 continuation byte *) | '\224'..'\239' -> nc := 2 (* expect 2 continuation bytes *) | '\240'..'\247' -> nc := 3 (* expect 3 continuation bytes *) - | '\248'..'\251' -> nc := 4 (* expect 4 continuation bytes *) - | '\252'..'\253' -> nc := 5 (* expect 5 continuation bytes *) - | '\254'..'\255' -> nc := 0 (* invalid byte *) + | '\248'..'\255' -> nc := 0 (* invalid byte *) end ; incr p ; while !p < len && !nc > 0 do @@ -332,9 +330,7 @@ let utf8_sub s start_u len_u = | '\192'..'\223' -> nc := 1 (* expect 1 continuation byte *) | '\224'..'\239' -> nc := 2 (* expect 2 continuation bytes *) | '\240'..'\247' -> nc := 3 (* expect 3 continuation bytes *) - | '\248'..'\251' -> nc := 4 (* expect 4 continuation bytes *) - | '\252'..'\253' -> nc := 5 (* expect 5 continuation bytes *) - | '\254'..'\255' -> nc := 0 (* invalid byte *) + | '\248'..'\255' -> nc := 0 (* invalid byte *) end ; incr p ; while !p < len_b && !nc > 0 do |