diff options
author | Adam Chlipala <adam@chlipala.net> | 2019-09-25 19:54:59 -0400 |
---|---|---|
committer | Adam Chlipala <adam@chlipala.net> | 2019-09-25 19:54:59 -0400 |
commit | 8728f397bee2b567611dcd7a7c359c7e92159c1c (patch) | |
tree | 283c2b659f5b9a4ed6ef097ba4b0f2052d966c28 | |
parent | c388a91762e9dd9aef2eb097963af020c0e452f8 (diff) |
Unicode escapes in JSON
-rw-r--r-- | include/urweb/urweb_cpp.h | 1 | ||||
-rw-r--r-- | lib/ur/basis.urs | 1 | ||||
-rw-r--r-- | lib/ur/json.ur | 29 | ||||
-rw-r--r-- | src/c/urweb.c | 12 |
4 files changed, 43 insertions, 0 deletions
diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index dcf67fef..e4ad6e61 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -166,6 +166,7 @@ uw_Basis_string uw_Basis_strchr(struct uw_context *, const char *, uw_Basis_char uw_Basis_int uw_Basis_strcspn(struct uw_context *, const char *, const char *); uw_Basis_string uw_Basis_substring(struct uw_context *, const char *, uw_Basis_int, uw_Basis_int); uw_Basis_string uw_Basis_str1(struct uw_context *, uw_Basis_char); +uw_Basis_string uw_Basis_ofUnicode(struct uw_context *, uw_Basis_int); uw_Basis_string uw_strdup(struct uw_context *, const char *); uw_Basis_string uw_maybe_strdup(struct uw_context *, const char *); diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index 2a98bf6f..d29bf6e6 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -95,6 +95,7 @@ val strsindex : string -> string -> option int val strcspn : string -> string -> int val substring : string -> int -> int -> string val str1 : char -> string +val ofUnicode : int -> string class show val show : t ::: Type -> show t -> t -> string diff --git a/lib/ur/json.ur b/lib/ur/json.ur index 05406739..70f0c797 100644 --- a/lib/ur/json.ur +++ b/lib/ur/json.ur @@ -59,6 +59,17 @@ fun escape s = "\"" ^ esc s end +fun unhex ch = + if Char.isDigit ch then + Char.toInt ch - Char.toInt #"0" + else if Char.isXdigit ch then + if Char.isUpper ch then + 10 + (Char.toInt ch - Char.toInt #"A") + else + 10 + (Char.toInt ch - Char.toInt #"a") + else + error <xml>Invalid hexadecimal digit "{[ch]}"</xml> + fun unescape s = let val len = String.length s @@ -75,6 +86,11 @@ fun unescape s = | #"\\" => if i+1 >= len then error <xml>JSON unescape: Bad escape sequence: {[s]}</xml> + else if String.sub s (i + 1) = #"u" then + if i+5 >= len then + error <xml>JSON unescape: Bad escape sequence: {[s]}</xml> + else + findEnd (i+6) else findEnd (i+2) | _ => findEnd (i+1) @@ -93,6 +109,19 @@ fun unescape s = #"\\" => if i+1 >= len then error <xml>JSON unescape: Bad escape sequence: {[s]}</xml> + else if String.sub s (i+1) = #"u" then + if i+5 >= len then + error <xml>JSON unescape: Unicode ends early</xml> + else + let + val n = + unhex (String.sub s (i+2)) * (256*16) + + unhex (String.sub s (i+3)) * 256 + + unhex (String.sub s (i+4)) * 16 + + unhex (String.sub s (i+5)) + in + ofUnicode n ^ unesc (i+6) + end else (case String.sub s (i+1) of #"n" => "\n" diff --git a/src/c/urweb.c b/src/c/urweb.c index af929269..8c445f39 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -2724,6 +2724,18 @@ uw_Basis_string uw_Basis_str1(uw_context ctx, uw_Basis_char ch) { return r; } +uw_Basis_string uw_Basis_ofUnicode(uw_context ctx, uw_Basis_int n) { + UChar buf16[] = {n}; + uw_Basis_string out = uw_malloc(ctx, 3); + int32_t outLen; + UErrorCode pErrorCode = 0; + + if (u_strToUTF8(out, 3, &outLen, buf16, 1, &pErrorCode) == NULL || outLen == 0) + uw_error(ctx, FATAL, "Bad Unicode string to unescape (error %s)", u_errorName(pErrorCode)); + + return out; +} + uw_Basis_string uw_strdup(uw_context ctx, uw_Basis_string s1) { int len = strlen(s1) + 1; char *s; |