From 8728f397bee2b567611dcd7a7c359c7e92159c1c Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Wed, 25 Sep 2019 19:54:59 -0400 Subject: Unicode escapes in JSON --- lib/ur/basis.urs | 1 + lib/ur/json.ur | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'lib') diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index 2a98bf6f..d29bf6e6 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -95,6 +95,7 @@ val strsindex : string -> string -> option int val strcspn : string -> string -> int val substring : string -> int -> int -> string val str1 : char -> string +val ofUnicode : int -> string class show val show : t ::: Type -> show t -> t -> string diff --git a/lib/ur/json.ur b/lib/ur/json.ur index 05406739..70f0c797 100644 --- a/lib/ur/json.ur +++ b/lib/ur/json.ur @@ -59,6 +59,17 @@ fun escape s = "\"" ^ esc s end +fun unhex ch = + if Char.isDigit ch then + Char.toInt ch - Char.toInt #"0" + else if Char.isXdigit ch then + if Char.isUpper ch then + 10 + (Char.toInt ch - Char.toInt #"A") + else + 10 + (Char.toInt ch - Char.toInt #"a") + else + error Invalid hexadecimal digit "{[ch]}" + fun unescape s = let val len = String.length s @@ -75,6 +86,11 @@ fun unescape s = | #"\\" => if i+1 >= len then error JSON unescape: Bad escape sequence: {[s]} + else if String.sub s (i + 1) = #"u" then + if i+5 >= len then + error JSON unescape: Bad escape sequence: {[s]} + else + findEnd (i+6) else findEnd (i+2) | _ => findEnd (i+1) @@ -93,6 +109,19 @@ fun unescape s = #"\\" => if i+1 >= len then error JSON unescape: Bad escape sequence: {[s]} + else if String.sub s (i+1) = #"u" then + if i+5 >= len then + error JSON unescape: Unicode ends early + else + let + val n = + unhex (String.sub s (i+2)) * (256*16) + + unhex (String.sub s (i+3)) * 256 + + unhex (String.sub s (i+4)) * 16 + + unhex (String.sub s (i+5)) + in + ofUnicode n ^ unesc (i+6) + end else (case String.sub s (i+1) of #"n" => "\n" -- cgit v1.2.3