From eb86dffeeec897d17905f3adff84e6acfd018330 Mon Sep 17 00:00:00 2001 From: Denis Redozubov Date: Wed, 22 Aug 2018 15:11:32 +0300 Subject: Rough same page anchors --- include/urweb/urweb_cpp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/urweb/urweb_cpp.h') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 5f1144b8..1351cfbc 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -242,6 +242,7 @@ uw_Basis_string uw_Basis_blessEnvVar(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_blessMeta(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_checkUrl(struct uw_context *, uw_Basis_string); +uw_Basis_string uw_Basis_anchorUrl(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_checkMime(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_checkRequestHeader(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_checkResponseHeader(struct uw_context *, uw_Basis_string); -- cgit v1.2.3 From 5cc729b48aad084757a049b7e5cdbadae5e9e400 Mon Sep 17 00:00:00 2001 From: fab Date: Fri, 30 Nov 2018 23:29:14 +0000 Subject: reject invalid codepoints. Basis.iscodepoint. fix german char in js --- include/urweb/urweb_cpp.h | 5 +- lib/js/urweb.js | 7 +- lib/ur/basis.urs | 2 + src/c/urweb.c | 265 +++++++++++++++++++++++++++------------------- 4 files changed, 168 insertions(+), 111 deletions(-) (limited to 'include/urweb/urweb_cpp.h') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 5f1144b8..25f97fb3 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -103,7 +103,7 @@ char *uw_Basis_htmlifyFloat(struct uw_context *, uw_Basis_float); char *uw_Basis_htmlifyString(struct uw_context *, uw_Basis_string); char *uw_Basis_htmlifyBool(struct uw_context *, uw_Basis_bool); char *uw_Basis_htmlifyTime(struct uw_context *, uw_Basis_time); -char *uw_Basis_htmlifySpecialChar(struct uw_context *, unsigned char); +char *uw_Basis_htmlifySpecialChar(struct uw_context *, uw_Basis_char); char *uw_Basis_htmlifySource(struct uw_context *, uw_Basis_source); uw_unit uw_Basis_htmlifyInt_w(struct uw_context *, uw_Basis_int); @@ -111,7 +111,7 @@ uw_unit uw_Basis_htmlifyFloat_w(struct uw_context *, uw_Basis_float); uw_unit uw_Basis_htmlifyString_w(struct uw_context *, uw_Basis_string); uw_unit uw_Basis_htmlifyBool_w(struct uw_context *, uw_Basis_bool); uw_unit uw_Basis_htmlifyTime_w(struct uw_context *, uw_Basis_time); -uw_unit uw_Basis_htmlifySpecialChar_w(struct uw_context *, unsigned char); +uw_unit uw_Basis_htmlifySpecialChar_w(struct uw_context *, uw_Basis_char); uw_unit uw_Basis_htmlifySource_w(struct uw_context *, uw_Basis_source); char *uw_Basis_attrifyInt(struct uw_context *, uw_Basis_int); @@ -327,6 +327,7 @@ uw_Basis_bool uw_Basis_isxdigit(struct uw_context *, uw_Basis_char); uw_Basis_char uw_Basis_tolower(struct uw_context *, uw_Basis_char); uw_Basis_char uw_Basis_toupper(struct uw_context *, uw_Basis_char); +uw_Basis_bool uw_Basis_iscodepoint(struct uw_context *, uw_Basis_int); uw_Basis_int uw_Basis_ord(struct uw_context *, uw_Basis_char); uw_Basis_char uw_Basis_chr(struct uw_context *, uw_Basis_int); diff --git a/lib/js/urweb.js b/lib/js/urweb.js index de1a2ad0..c7725e28 100644 --- a/lib/js/urweb.js +++ b/lib/js/urweb.js @@ -38,7 +38,12 @@ function isXdigit(c) { return isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' function ord(c) { return c.charCodeAt(0); } function isPrint(c) { return ord(c) > 31 && ord(c) != 127; } function toLower(c) { return c.toLowerCase(); } -function toUpper(c) { return c.toUpperCase(); } +function toUpper(c) { + if (ord(c) == 223) + return c; + else + return c.toUpperCase(); +} // Lists diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index 878f2793..c9d6556b 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -79,6 +79,8 @@ val toupper : char -> char val ord : char -> int val chr : int -> char +val iscodepoint : int -> bool + (** String operations *) val strlen : string -> int diff --git a/src/c/urweb.c b/src/c/urweb.c index be65afcc..195ddada 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -1559,101 +1559,89 @@ const char *uw_Basis_get_settings(uw_context ctx, uw_unit u) { } } -uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) { - char *r, *s2; - - uw_check_heap(ctx, strlen(s) * 4 + 3); - - r = s2 = ctx->heap.front; - *s2++ = '"'; - - for (; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '"': - strcpy(s2, "\\\""); - s2 += 2; - break; - case '\'': - strcpy(s2, "\\047"); - s2 += 4; - break; - case '\\': - strcpy(s2, "\\\\"); - s2 += 2; - break; - case '<': - strcpy(s2, "\\074"); - s2 += 4; - break; - case '&': - strcpy(s2, "\\046"); - s2 += 4; - break; - default: - if (isprint((int)c) || c >= 128) - *s2++ = c; - else { - sprintf(s2, "\\%03o", c); - s2 += 4; - } - } - } - - strcpy(s2, "\""); - ctx->heap.front = s2 + 2; - return r; -} - uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char ch); - -uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) { - char *r, *s2; - - uw_check_heap(ctx, 7); - - r = s2 = ctx->heap.front; - *s2++ = '"'; - +void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) { + char* buffer = *buffer_ptr; + switch (c1) { case '"': - strcpy(s2, "\\\""); - s2 += 2; + strcpy(buffer, "\\\""); + buffer += 2; break; case '\'': - strcpy(s2, "\\047"); - s2 += 4; + strcpy(buffer, "\\047"); + buffer += 4; break; case '\\': - strcpy(s2, "\\\\"); - s2 += 2; + strcpy(buffer, "\\\\"); + buffer += 2; break; case '<': - strcpy(s2, "\\074"); - s2 += 4; + strcpy(buffer, "\\074"); + buffer += 4; break; case '&': - strcpy(s2, "\\046"); - s2 += 4; + strcpy(buffer, "\\046"); + buffer += 4; break; default: if (uw_Basis_isprint(ctx, c1) == uw_Basis_True) { int offset = 0; - U8_APPEND_UNSAFE(s2, offset, c1); - s2 += offset; + U8_APPEND_UNSAFE(buffer, offset, c1); + buffer += offset; } else { - assert(0777 >= c1); - sprintf(s2, "\\%03o", (unsigned char)c1); - s2 += 4; + assert(65536 > c1); + sprintf(buffer, "\\u%04x", (unsigned char)c1); + buffer += 6; } } + + *buffer_ptr = buffer; +} + +uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) { + char *r, *s2; + uw_Basis_char c; + + uw_check_heap(ctx, strlen(s) * 6 + 3); + + r = s2 = ctx->heap.front; + *s2++ = '"'; + + int offset = 0; + while(s[offset] != 0) + { + U8_NEXT(s, offset, -1, c); + + jsifyChar(&s2, ctx, c); + } + strcpy(s2, "\""); ctx->heap.front = s2 + 2; + + return r; +} + +uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c); + +uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) { + char *r, *s2; + + uw_check_heap(ctx, 8); + + r = s2 = ctx->heap.front; + + *s2++ = '"'; + + jsifyChar(&s2, ctx, c1); + + strcpy(s2, "\""); + ctx->heap.front = s2 + 2; + return r; } @@ -1697,6 +1685,7 @@ uw_Basis_string uw_Basis_jsifyString_ws(uw_context ctx, uw_Basis_string s) { strcpy(s2, "\""); ctx->script.front = s2 + 1; + return r; } @@ -2262,25 +2251,27 @@ uw_unit uw_Basis_htmlifyInt_w(uw_context ctx, uw_Basis_int n) { return uw_unit_v; } -char *uw_Basis_htmlifySpecialChar(uw_context ctx, unsigned char ch) { +char *uw_Basis_htmlifySpecialChar(uw_context ctx, uw_Basis_char ch) { unsigned int n = ch; int len; char *r; - uw_check_heap(ctx, INTS_MAX+3); + uw_check_heap(ctx, INTS_MAX+3 + 1); r = ctx->heap.front; - sprintf(r, "&#%u;%n", n, &len); + len = sprintf(r, "&#%u;", n); ctx->heap.front += len+1; + return r; } -uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, unsigned char ch) { +uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, uw_Basis_char ch) { unsigned int n = ch; int len; uw_check(ctx, INTS_MAX+3); - sprintf(ctx->page.front, "&#%u;%n", n, &len); + len = sprintf(ctx->page.front, "&#%u;", n); ctx->page.front += len; + return uw_unit_v; } @@ -2328,48 +2319,69 @@ uw_unit uw_Basis_jsifyInt_w(uw_context ctx, uw_Basis_int n) { char *uw_Basis_htmlifyString(uw_context ctx, const char *s) { char *r, *s2; + uw_Basis_char c1; + int offset = 0, len = 0; + + uw_check_heap(ctx, strlen(s) * (INTS_MAX + 3) + 1); - uw_check_heap(ctx, strlen(s) * 5 + 1); - - for (r = s2 = ctx->heap.front; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '<': - strcpy(s2, "<"); - s2 += 4; - break; - case '&': - strcpy(s2, "&"); - s2 += 5; - break; - default: - *s2++ = c; + r = s2 = ctx->heap.front; + + while (s[offset] != 0) { + + U8_NEXT(s, offset, -1, c1); + + + if (U8_IS_SINGLE(c1) && uw_Basis_isprint(ctx, c1)) { + switch (c1) { + case '<': + strcpy(s2, "<"); + s2 += 4; + break; + case '&': + strcpy(s2, "&"); + s2 += 5; + break; + default: + *s2++ = c1; + } + } else { + len = sprintf(s2, "&#%u;", c1); + s2 += len; } } - + *s2++ = 0; ctx->heap.front = s2; + return r; } uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { uw_check(ctx, strlen(s) * 6); - - for (; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '<': - uw_write_unsafe(ctx, "<"); - break; - case '&': - uw_write_unsafe(ctx, "&"); - break; - default: - uw_writec_unsafe(ctx, c); + int offset = 0; + uw_Basis_char c1; + + while(s[offset] != 0){ + + U8_NEXT(s, offset, -1, c1); + + if (U8_IS_SINGLE(c1) && uw_Basis_isprint(ctx, c1)) { + + switch (c1) { + case '<': + uw_write_unsafe(ctx, "<"); + break; + case '&': + uw_write_unsafe(ctx, "&"); + break; + default: + uw_writec_unsafe(ctx, c1); + } } - } + else { + uw_Basis_htmlifySpecialChar_w(ctx, c1); + } + } return uw_unit_v; } @@ -4474,9 +4486,46 @@ uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) { return (uw_Basis_int)c; } +uw_Basis_bool uw_Basis_iscodepoint (uw_context ctx, uw_Basis_int n) { + (void)ctx; + uw_Basis_char ch = (uw_Basis_char)n; + + if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) { + + if (U8_LENGTH(ch) == 0) { + return uw_Basis_False; + } + + if (u_charType(ch) == U_UNASSIGNED) { + return uw_Basis_False; + } + + } else { + return uw_Basis_False; + } + + return uw_Basis_True; +} + uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) { (void)ctx; - return (uw_Basis_char)n; + uw_Basis_char ch = (uw_Basis_char)n; + + if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) { + + if (U8_LENGTH(ch) == 0) { + uw_error(ctx, FATAL, "The integer %lld cannot be converted to a char", n); + } + + if (u_charType(ch) == U_UNASSIGNED) { + uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n); + } + + } else { + uw_error(ctx, FATAL, "Integer %lld out of range of unicode chars", n); + } + + return ch; } uw_Basis_string uw_Basis_currentUrl(uw_context ctx) { -- cgit v1.2.3 From 28d130c8c3c2ef9cd229d09afe14fbcbcb954223 Mon Sep 17 00:00:00 2001 From: fab Date: Wed, 9 Jan 2019 22:34:53 +0000 Subject: urlifyChar needs to be added to .h file as well --- include/urweb/urweb_cpp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/urweb/urweb_cpp.h') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 25f97fb3..25f26e1b 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -138,6 +138,7 @@ char *uw_Basis_urlifySource(struct uw_context *, uw_Basis_source); uw_unit uw_Basis_urlifyInt_w(struct uw_context *, uw_Basis_int); uw_unit uw_Basis_urlifyFloat_w(struct uw_context *, uw_Basis_float); +uw_unit uw_Basis_urlifyChar_w(struct uw_context *, uw_Basis_char); uw_unit uw_Basis_urlifyString_w(struct uw_context *, uw_Basis_string); uw_unit uw_Basis_urlifyBool_w(struct uw_context *, uw_Basis_bool); uw_unit uw_Basis_urlifyTime_w(struct uw_context *, uw_Basis_time); -- cgit v1.2.3 From 87d2eab53f8e9f81cc459429675123c9ff36f41e Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Mon, 21 Jan 2019 18:09:59 -0500 Subject: Basis.textOfBlob; try creating filecache directory if it doesn't exist --- include/urweb/urweb_cpp.h | 1 + lib/ur/basis.urs | 2 ++ src/c/urweb.c | 16 +++++++++++++++- src/cjr_print.sml | 24 +++++++++++++++++++++++- 4 files changed, 41 insertions(+), 2 deletions(-) (limited to 'include/urweb/urweb_cpp.h') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 25f97fb3..67312015 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -262,6 +262,7 @@ uw_Basis_string uw_Basis_fileMimeType(struct uw_context *, uw_Basis_file); uw_Basis_blob uw_Basis_fileData(struct uw_context *, uw_Basis_file); uw_Basis_int uw_Basis_blobSize(struct uw_context *, uw_Basis_blob); uw_Basis_blob uw_Basis_textBlob(struct uw_context *, uw_Basis_string); +uw_Basis_string uw_Basis_textOfBlob(struct uw_context *, uw_Basis_blob); uw_Basis_string uw_Basis_postType(struct uw_context *, uw_Basis_postBody); uw_Basis_string uw_Basis_postData(struct uw_context *, uw_Basis_postBody); diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index c893e65d..be13c684 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -1019,6 +1019,8 @@ val checkMime : string -> option mimeType val returnBlob : t ::: Type -> blob -> mimeType -> transaction t val blobSize : blob -> int val textBlob : string -> blob +val textOfBlob : blob -> option string +(* Returns [Some] exactly when the blob contains no zero bytes. *) type postBody val postType : postBody -> string diff --git a/src/c/urweb.c b/src/c/urweb.c index ae2fc0a8..c8cfb0c6 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -4075,6 +4075,20 @@ uw_Basis_blob uw_Basis_textBlob(uw_context ctx, uw_Basis_string s) { return b; } +uw_Basis_string uw_Basis_textOfBlob(uw_context ctx, uw_Basis_blob b) { + size_t i; + uw_Basis_string r; + + for (i = 0; i < b.size; ++i) + if (b.data[i] == 0) + return NULL; + + r = uw_malloc(ctx, b.size + 1); + memcpy(r, b.data, b.size); + r[b.size] = 0; + return r; +} + uw_Basis_blob uw_Basis_fileData(uw_context ctx, uw_Basis_file f) { (void)ctx; return f.data; @@ -5207,7 +5221,7 @@ uw_unit uw_Basis_cache_file(uw_context ctx, uw_Basis_blob contents) { fd = mkstemp(tempfile); if (fd < 0) - uw_error(ctx, FATAL, "Error creating temporary file for cache"); + uw_error(ctx, FATAL, "Error creating temporary file %s for cache", tempfile); while (written_so_far < contents.size) { ssize_t written_just_now = write(fd, contents.data + written_so_far, contents.size - written_so_far); diff --git a/src/cjr_print.sml b/src/cjr_print.sml index 31653a74..09cd9c7f 100644 --- a/src/cjr_print.sml +++ b/src/cjr_print.sml @@ -3391,6 +3391,14 @@ fun p_file env (ds, ps) = newline, string "#include ", newline, + (case Settings.getFileCache () of + NONE => box [] + | SOME _ => box [string "#include ", + newline, + string "#include ", + newline, + string "#include ", + newline]), if hasDb then box [string ("#include <" ^ #header (Settings.currentDbms ()) ^ ">"), newline] @@ -3655,7 +3663,21 @@ fun p_file env (ds, ps) = newline, string "static void uw_initializer(uw_context ctx) {", newline, - box [string "uw_begin_initializing(ctx);", + box [(case Settings.getFileCache () of + NONE => box [] + | SOME dir => box [newline, + string "struct stat st = {0};", + newline, + newline, + string "if (stat(\"", + string (Prim.toCString dir), + string "\", &st) == -1)", + newline, + box [string "mkdir(\"", + string (Prim.toCString dir), + string "\", 0700);", + newline]]), + string "uw_begin_initializing(ctx);", newline, p_list_sep newline (fn x => x) (rev (!global_initializers)), string "uw_end_initializing(ctx);", -- cgit v1.2.3 From 3f119f5c0a5f210ed442841dfed3ae98786004e9 Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Sat, 23 Mar 2019 20:16:15 -0400 Subject: Supporting 'char' arguments to handlers called from client code --- include/urweb/urweb_cpp.h | 1 + src/c/urweb.c | 17 +++++++++++++++++ src/settings.sml | 2 ++ 3 files changed, 20 insertions(+) (limited to 'include/urweb/urweb_cpp.h') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 18b5f583..dcf67fef 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -149,6 +149,7 @@ uw_Basis_unit uw_Basis_unurlifyUnit(struct uw_context * ctx, char **s); uw_Basis_int uw_Basis_unurlifyInt(struct uw_context *, char **); uw_Basis_float uw_Basis_unurlifyFloat(struct uw_context *, char **); uw_Basis_string uw_Basis_unurlifyString(struct uw_context *, char **); +uw_Basis_char uw_Basis_unurlifyChar(struct uw_context *, char **); uw_Basis_string uw_Basis_unurlifyString_fromClient(struct uw_context *, char **); uw_Basis_bool uw_Basis_unurlifyBool(struct uw_context *, char **); uw_Basis_time uw_Basis_unurlifyTime(struct uw_context *, char **); diff --git a/src/c/urweb.c b/src/c/urweb.c index 58f7884d..4d9e8630 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -2267,6 +2267,23 @@ uw_Basis_string uw_Basis_unurlifyString(uw_context ctx, char **s) { return r; } +uw_Basis_char uw_Basis_unurlifyChar(uw_context ctx, char **s) { + char *new_s = uw_unurlify_advance(*s); + char *r; + int len; + + len = strlen(*s); + uw_check_heap(ctx, len + 1); + + r = ctx->heap.front; + ctx->heap.front = uw_unurlifyString_to(0, ctx, ctx->heap.front, *s); + *s = new_s; + if (strlen(r) == 1) + return r[0]; + else + uw_error(ctx, FATAL, "Unurlified character is multiple characters long"); +} + uw_Basis_unit uw_Basis_unurlifyUnit(uw_context ctx, char **s) { (void)ctx; *s = uw_unurlify_advance(*s); diff --git a/src/settings.sml b/src/settings.sml index abb26f72..edc03d4c 100644 --- a/src/settings.sml +++ b/src/settings.sml @@ -117,6 +117,7 @@ fun basis x = S.addList (S.empty, map (fn x : string => ("Basis", x)) x) val clientToServerBase = basis ["int", "float", "string", + "char", "time", "file", "unit", @@ -277,6 +278,7 @@ val jsFuncsBase = basisM [("alert", "alert"), ("urlifyFloat", "ts"), ("urlifyTime", "ts"), ("urlifyString", "uf"), + ("urlifyChar", "uf"), ("urlifyBool", "ub"), ("recv", "rv"), ("strcat", "cat"), -- cgit v1.2.3 From 8728f397bee2b567611dcd7a7c359c7e92159c1c Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Wed, 25 Sep 2019 19:54:59 -0400 Subject: Unicode escapes in JSON --- include/urweb/urweb_cpp.h | 1 + lib/ur/basis.urs | 1 + lib/ur/json.ur | 29 +++++++++++++++++++++++++++++ src/c/urweb.c | 12 ++++++++++++ 4 files changed, 43 insertions(+) (limited to 'include/urweb/urweb_cpp.h') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index dcf67fef..e4ad6e61 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -166,6 +166,7 @@ uw_Basis_string uw_Basis_strchr(struct uw_context *, const char *, uw_Basis_char uw_Basis_int uw_Basis_strcspn(struct uw_context *, const char *, const char *); uw_Basis_string uw_Basis_substring(struct uw_context *, const char *, uw_Basis_int, uw_Basis_int); uw_Basis_string uw_Basis_str1(struct uw_context *, uw_Basis_char); +uw_Basis_string uw_Basis_ofUnicode(struct uw_context *, uw_Basis_int); uw_Basis_string uw_strdup(struct uw_context *, const char *); uw_Basis_string uw_maybe_strdup(struct uw_context *, const char *); diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index 2a98bf6f..d29bf6e6 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -95,6 +95,7 @@ val strsindex : string -> string -> option int val strcspn : string -> string -> int val substring : string -> int -> int -> string val str1 : char -> string +val ofUnicode : int -> string class show val show : t ::: Type -> show t -> t -> string diff --git a/lib/ur/json.ur b/lib/ur/json.ur index 05406739..70f0c797 100644 --- a/lib/ur/json.ur +++ b/lib/ur/json.ur @@ -59,6 +59,17 @@ fun escape s = "\"" ^ esc s end +fun unhex ch = + if Char.isDigit ch then + Char.toInt ch - Char.toInt #"0" + else if Char.isXdigit ch then + if Char.isUpper ch then + 10 + (Char.toInt ch - Char.toInt #"A") + else + 10 + (Char.toInt ch - Char.toInt #"a") + else + error Invalid hexadecimal digit "{[ch]}" + fun unescape s = let val len = String.length s @@ -75,6 +86,11 @@ fun unescape s = | #"\\" => if i+1 >= len then error JSON unescape: Bad escape sequence: {[s]} + else if String.sub s (i + 1) = #"u" then + if i+5 >= len then + error JSON unescape: Bad escape sequence: {[s]} + else + findEnd (i+6) else findEnd (i+2) | _ => findEnd (i+1) @@ -93,6 +109,19 @@ fun unescape s = #"\\" => if i+1 >= len then error JSON unescape: Bad escape sequence: {[s]} + else if String.sub s (i+1) = #"u" then + if i+5 >= len then + error JSON unescape: Unicode ends early + else + let + val n = + unhex (String.sub s (i+2)) * (256*16) + + unhex (String.sub s (i+3)) * 256 + + unhex (String.sub s (i+4)) * 16 + + unhex (String.sub s (i+5)) + in + ofUnicode n ^ unesc (i+6) + end else (case String.sub s (i+1) of #"n" => "\n" diff --git a/src/c/urweb.c b/src/c/urweb.c index af929269..8c445f39 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -2724,6 +2724,18 @@ uw_Basis_string uw_Basis_str1(uw_context ctx, uw_Basis_char ch) { return r; } +uw_Basis_string uw_Basis_ofUnicode(uw_context ctx, uw_Basis_int n) { + UChar buf16[] = {n}; + uw_Basis_string out = uw_malloc(ctx, 3); + int32_t outLen; + UErrorCode pErrorCode = 0; + + if (u_strToUTF8(out, 3, &outLen, buf16, 1, &pErrorCode) == NULL || outLen == 0) + uw_error(ctx, FATAL, "Bad Unicode string to unescape (error %s)", u_errorName(pErrorCode)); + + return out; +} + uw_Basis_string uw_strdup(uw_context ctx, uw_Basis_string s1) { int len = strlen(s1) + 1; char *s; -- cgit v1.2.3