From eb86dffeeec897d17905f3adff84e6acfd018330 Mon Sep 17 00:00:00 2001
From: Denis Redozubov <denis.redozubov@gmail.com>
Date: Wed, 22 Aug 2018 15:11:32 +0300
Subject: Rough same page anchors

---
 include/urweb/urweb_cpp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/urweb/urweb_cpp.h')

diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h
index 5f1144b8..1351cfbc 100644
--- a/include/urweb/urweb_cpp.h
+++ b/include/urweb/urweb_cpp.h
@@ -242,6 +242,7 @@ uw_Basis_string uw_Basis_blessEnvVar(struct uw_context *, uw_Basis_string);
 uw_Basis_string uw_Basis_blessMeta(struct uw_context *, uw_Basis_string);
 
 uw_Basis_string uw_Basis_checkUrl(struct uw_context *, uw_Basis_string);
+uw_Basis_string uw_Basis_anchorUrl(struct uw_context *, uw_Basis_string);
 uw_Basis_string uw_Basis_checkMime(struct uw_context *, uw_Basis_string);
 uw_Basis_string uw_Basis_checkRequestHeader(struct uw_context *, uw_Basis_string);
 uw_Basis_string uw_Basis_checkResponseHeader(struct uw_context *, uw_Basis_string);
-- 
cgit v1.2.3


From 5cc729b48aad084757a049b7e5cdbadae5e9e400 Mon Sep 17 00:00:00 2001
From: fab <fabrice.leal.ch@gmail.com>
Date: Fri, 30 Nov 2018 23:29:14 +0000
Subject: reject invalid codepoints. Basis.iscodepoint. fix german char in js

---
 include/urweb/urweb_cpp.h |   5 +-
 lib/js/urweb.js           |   7 +-
 lib/ur/basis.urs          |   2 +
 src/c/urweb.c             | 265 +++++++++++++++++++++++++++-------------------
 4 files changed, 168 insertions(+), 111 deletions(-)

(limited to 'include/urweb/urweb_cpp.h')

diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h
index 5f1144b8..25f97fb3 100644
--- a/include/urweb/urweb_cpp.h
+++ b/include/urweb/urweb_cpp.h
@@ -103,7 +103,7 @@ char *uw_Basis_htmlifyFloat(struct uw_context *, uw_Basis_float);
 char *uw_Basis_htmlifyString(struct uw_context *, uw_Basis_string);
 char *uw_Basis_htmlifyBool(struct uw_context *, uw_Basis_bool);
 char *uw_Basis_htmlifyTime(struct uw_context *, uw_Basis_time);
-char *uw_Basis_htmlifySpecialChar(struct uw_context *, unsigned char);
+char *uw_Basis_htmlifySpecialChar(struct uw_context *, uw_Basis_char);
 char *uw_Basis_htmlifySource(struct uw_context *, uw_Basis_source);
 
 uw_unit uw_Basis_htmlifyInt_w(struct uw_context *, uw_Basis_int);
@@ -111,7 +111,7 @@ uw_unit uw_Basis_htmlifyFloat_w(struct uw_context *, uw_Basis_float);
 uw_unit uw_Basis_htmlifyString_w(struct uw_context *, uw_Basis_string);
 uw_unit uw_Basis_htmlifyBool_w(struct uw_context *, uw_Basis_bool);
 uw_unit uw_Basis_htmlifyTime_w(struct uw_context *, uw_Basis_time);
-uw_unit uw_Basis_htmlifySpecialChar_w(struct uw_context *, unsigned char);
+uw_unit uw_Basis_htmlifySpecialChar_w(struct uw_context *, uw_Basis_char);
 uw_unit uw_Basis_htmlifySource_w(struct uw_context *, uw_Basis_source);
 
 char *uw_Basis_attrifyInt(struct uw_context *, uw_Basis_int);
@@ -327,6 +327,7 @@ uw_Basis_bool uw_Basis_isxdigit(struct uw_context *, uw_Basis_char);
 uw_Basis_char uw_Basis_tolower(struct uw_context *, uw_Basis_char);
 uw_Basis_char uw_Basis_toupper(struct uw_context *, uw_Basis_char);
 
+uw_Basis_bool uw_Basis_iscodepoint(struct uw_context *, uw_Basis_int);
 uw_Basis_int uw_Basis_ord(struct uw_context *, uw_Basis_char);
 uw_Basis_char uw_Basis_chr(struct uw_context *, uw_Basis_int);
 
diff --git a/lib/js/urweb.js b/lib/js/urweb.js
index de1a2ad0..c7725e28 100644
--- a/lib/js/urweb.js
+++ b/lib/js/urweb.js
@@ -38,7 +38,12 @@ function isXdigit(c) { return isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A'
 function ord(c) { return c.charCodeAt(0); }
 function isPrint(c) { return ord(c) > 31 && ord(c) != 127; }
 function toLower(c) { return c.toLowerCase(); }
-function toUpper(c) { return c.toUpperCase(); }
+function toUpper(c) {
+    if (ord(c) == 223)
+	return c;
+    else
+	return c.toUpperCase();
+}
 
 // Lists
 
diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs
index 878f2793..c9d6556b 100644
--- a/lib/ur/basis.urs
+++ b/lib/ur/basis.urs
@@ -79,6 +79,8 @@ val toupper : char -> char
 val ord : char -> int
 val chr : int -> char
 
+val iscodepoint : int -> bool
+
 (** String operations *)
 
 val strlen : string -> int
diff --git a/src/c/urweb.c b/src/c/urweb.c
index be65afcc..195ddada 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -1559,101 +1559,89 @@ const char *uw_Basis_get_settings(uw_context ctx, uw_unit u) {
   }
 }
 
-uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) {
-  char *r, *s2;
-
-  uw_check_heap(ctx, strlen(s) * 4 + 3);
-
-  r = s2 = ctx->heap.front;
-  *s2++ = '"';
-
-  for (; *s; s++) {
-    unsigned char c = *s;
-
-    switch (c) {
-    case '"':
-      strcpy(s2, "\\\"");
-      s2 += 2;
-      break;
-    case '\'':
-      strcpy(s2, "\\047");
-      s2 += 4;
-      break;
-    case '\\':
-      strcpy(s2, "\\\\");
-      s2 += 2;
-      break;
-    case '<':
-      strcpy(s2, "\\074");
-      s2 += 4;
-      break;
-    case '&':
-      strcpy(s2, "\\046");
-      s2 += 4;
-      break;
-    default:
-      if (isprint((int)c) || c >= 128)
-        *s2++ = c;
-      else {
-        sprintf(s2, "\\%03o", c);
-        s2 += 4;
-      }
-    }
-  }
-
-  strcpy(s2, "\"");
-  ctx->heap.front = s2 + 2;
-  return r;
-}
-
 uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char ch);
-
-uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) {
-  char *r, *s2;
-
-  uw_check_heap(ctx, 7);
-
-  r = s2 = ctx->heap.front;
-  *s2++ = '"';
-
+void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) {
+  char* buffer = *buffer_ptr;
+  
   switch (c1) {
   case '"':
-    strcpy(s2, "\\\"");
-    s2 += 2;
+    strcpy(buffer, "\\\"");
+    buffer += 2;
     break;
   case '\'':
-    strcpy(s2, "\\047");
-    s2 += 4;
+    strcpy(buffer, "\\047");
+    buffer += 4;
     break;
   case '\\':
-    strcpy(s2, "\\\\");
-    s2 += 2;
+    strcpy(buffer, "\\\\");
+    buffer += 2;
     break;
   case '<':
-    strcpy(s2, "\\074");
-    s2 += 4;
+    strcpy(buffer, "\\074");
+    buffer += 4;
     break;
   case '&':
-    strcpy(s2, "\\046");
-    s2 += 4;
+    strcpy(buffer, "\\046");
+    buffer += 4;
     break;
   default:
     
     if (uw_Basis_isprint(ctx, c1) == uw_Basis_True)
       {
 	int offset = 0;
-	U8_APPEND_UNSAFE(s2, offset, c1);
-	s2 += offset;
+	U8_APPEND_UNSAFE(buffer, offset, c1);
+	buffer += offset;
       }
     else {
-      assert(0777 >= c1);
-      sprintf(s2, "\\%03o", (unsigned char)c1);
-      s2 += 4;
+      assert(65536 > c1);
+      sprintf(buffer, "\\u%04x", (unsigned char)c1);
+      buffer += 6;
     }
   }
 
+ 
+  *buffer_ptr = buffer;
+}
+
+uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) {
+  char *r, *s2;
+  uw_Basis_char c;
+
+  uw_check_heap(ctx, strlen(s) * 6 + 3);
+
+  r = s2 = ctx->heap.front;
+  *s2++ = '"';
+
+  int offset = 0;
+  while(s[offset] != 0)
+    {
+      U8_NEXT(s, offset, -1, c);
+      
+      jsifyChar(&s2, ctx, c);      
+    }
+
   strcpy(s2, "\"");
   ctx->heap.front = s2 + 2;
+
+  return r;
+}
+
+uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c);
+
+uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) {
+  char *r, *s2;
+
+  uw_check_heap(ctx, 8);
+
+  r = s2 = ctx->heap.front;
+  
+  *s2++ = '"';
+  
+  jsifyChar(&s2, ctx, c1);
+
+  strcpy(s2, "\"");
+  ctx->heap.front = s2 + 2;
+
   return r;
 }
 
@@ -1697,6 +1685,7 @@ uw_Basis_string uw_Basis_jsifyString_ws(uw_context ctx, uw_Basis_string s) {
 
   strcpy(s2, "\"");
   ctx->script.front = s2 + 1;
+
   return r;
 }
 
@@ -2262,25 +2251,27 @@ uw_unit uw_Basis_htmlifyInt_w(uw_context ctx, uw_Basis_int n) {
   return uw_unit_v;
 }
 
-char *uw_Basis_htmlifySpecialChar(uw_context ctx, unsigned char ch) {
+char *uw_Basis_htmlifySpecialChar(uw_context ctx, uw_Basis_char ch) {
   unsigned int n = ch;
   int len;
   char *r;
 
-  uw_check_heap(ctx, INTS_MAX+3);
+  uw_check_heap(ctx, INTS_MAX+3 + 1);
   r = ctx->heap.front;
-  sprintf(r, "&#%u;%n", n, &len);
+  len = sprintf(r, "&#%u;", n);
   ctx->heap.front += len+1;
+
   return r;
 }
 
-uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, unsigned char ch) {
+uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, uw_Basis_char ch) {
   unsigned int n = ch;
   int len;
 
   uw_check(ctx, INTS_MAX+3);
-  sprintf(ctx->page.front, "&#%u;%n", n, &len);
+  len = sprintf(ctx->page.front, "&#%u;", n);
   ctx->page.front += len;
+
   return uw_unit_v;
 }
 
@@ -2328,48 +2319,69 @@ uw_unit uw_Basis_jsifyInt_w(uw_context ctx, uw_Basis_int n) {
 
 char *uw_Basis_htmlifyString(uw_context ctx, const char *s) {
   char *r, *s2;
+  uw_Basis_char c1;
+  int offset = 0, len = 0;
+  
+  uw_check_heap(ctx, strlen(s) * (INTS_MAX + 3) + 1);
 
-  uw_check_heap(ctx, strlen(s) * 5 + 1);
-
-  for (r = s2 = ctx->heap.front; *s; s++) {
-    unsigned char c = *s;
-
-    switch (c) {
-    case '<':
-      strcpy(s2, "&lt;");
-      s2 += 4;
-      break;
-    case '&':
-      strcpy(s2, "&amp;");
-      s2 += 5;
-      break;
-    default:
-      *s2++ = c;
+  r = s2 = ctx->heap.front;
+  
+  while (s[offset] != 0) {
+    
+    U8_NEXT(s, offset, -1, c1);
+     
+    
+    if (U8_IS_SINGLE(c1) && uw_Basis_isprint(ctx, c1)) {
+      switch (c1) {
+      case '<':
+	strcpy(s2, "&lt;");
+	s2 += 4;
+	break;
+      case '&':
+	strcpy(s2, "&amp;");
+	s2 += 5;
+	break;
+      default:
+	*s2++ = c1;	
+      }      
+    } else {
+      len = sprintf(s2, "&#%u;", c1);
+      s2 += len;
     }
   }
-
+  
   *s2++ = 0;
   ctx->heap.front = s2;
+
   return r;
 }
 
 uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) {
   uw_check(ctx, strlen(s) * 6);
-
-  for (; *s; s++) {
-    unsigned char c = *s;
-
-    switch (c) {
-    case '<':
-      uw_write_unsafe(ctx, "&lt;");
-      break;
-    case '&':
-      uw_write_unsafe(ctx, "&amp;");
-      break;
-    default:
-      uw_writec_unsafe(ctx, c);
+  int offset = 0;
+  uw_Basis_char c1;
+  
+  while(s[offset] != 0){
+
+    U8_NEXT(s, offset, -1, c1);
+ 
+    if (U8_IS_SINGLE(c1) && uw_Basis_isprint(ctx, c1)) {
+	
+      switch (c1) {
+      case '<':
+	uw_write_unsafe(ctx, "&lt;");
+	break;
+      case '&':
+	uw_write_unsafe(ctx, "&amp;");
+	break;
+      default:
+	uw_writec_unsafe(ctx, c1);
+      }
     }
-  }
+    else {
+      uw_Basis_htmlifySpecialChar_w(ctx, c1);
+    }    
+  }  
 
   return uw_unit_v;
 }
@@ -4474,9 +4486,46 @@ uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) {
   return (uw_Basis_int)c;
 }
 
+uw_Basis_bool uw_Basis_iscodepoint (uw_context ctx, uw_Basis_int n) {
+  (void)ctx;
+  uw_Basis_char ch = (uw_Basis_char)n;
+
+  if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) {
+
+    if (U8_LENGTH(ch) == 0) {
+      return uw_Basis_False;
+    }
+
+    if (u_charType(ch) == U_UNASSIGNED) {
+      return uw_Basis_False;
+    }
+
+  } else {
+    return uw_Basis_False;
+  }
+
+  return uw_Basis_True;
+}
+
 uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) {
   (void)ctx;
-  return (uw_Basis_char)n;
+  uw_Basis_char ch = (uw_Basis_char)n;
+
+  if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) {
+
+    if (U8_LENGTH(ch) == 0) {
+      uw_error(ctx, FATAL, "The integer %lld cannot be converted to a char", n);
+    }
+
+    if (u_charType(ch) == U_UNASSIGNED) {
+      uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n);
+    }
+
+  } else {
+    uw_error(ctx, FATAL, "Integer %lld out of range of unicode chars", n);
+  }
+ 
+  return ch;
 }
 
 uw_Basis_string uw_Basis_currentUrl(uw_context ctx) {
-- 
cgit v1.2.3


From 28d130c8c3c2ef9cd229d09afe14fbcbcb954223 Mon Sep 17 00:00:00 2001
From: fab <fabrice.leal.ch@gmail.com>
Date: Wed, 9 Jan 2019 22:34:53 +0000
Subject: urlifyChar needs to be added to .h file as well

---
 include/urweb/urweb_cpp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/urweb/urweb_cpp.h')

diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h
index 25f97fb3..25f26e1b 100644
--- a/include/urweb/urweb_cpp.h
+++ b/include/urweb/urweb_cpp.h
@@ -138,6 +138,7 @@ char *uw_Basis_urlifySource(struct uw_context *, uw_Basis_source);
 
 uw_unit uw_Basis_urlifyInt_w(struct uw_context *, uw_Basis_int);
 uw_unit uw_Basis_urlifyFloat_w(struct uw_context *, uw_Basis_float);
+uw_unit uw_Basis_urlifyChar_w(struct uw_context *, uw_Basis_char);
 uw_unit uw_Basis_urlifyString_w(struct uw_context *, uw_Basis_string);
 uw_unit uw_Basis_urlifyBool_w(struct uw_context *, uw_Basis_bool);
 uw_unit uw_Basis_urlifyTime_w(struct uw_context *, uw_Basis_time);
-- 
cgit v1.2.3


From 87d2eab53f8e9f81cc459429675123c9ff36f41e Mon Sep 17 00:00:00 2001
From: Adam Chlipala <adam@chlipala.net>
Date: Mon, 21 Jan 2019 18:09:59 -0500
Subject: Basis.textOfBlob; try creating filecache directory if it doesn't
 exist

---
 include/urweb/urweb_cpp.h |  1 +
 lib/ur/basis.urs          |  2 ++
 src/c/urweb.c             | 16 +++++++++++++++-
 src/cjr_print.sml         | 24 +++++++++++++++++++++++-
 4 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/urweb/urweb_cpp.h')

diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h
index 25f97fb3..67312015 100644
--- a/include/urweb/urweb_cpp.h
+++ b/include/urweb/urweb_cpp.h
@@ -262,6 +262,7 @@ uw_Basis_string uw_Basis_fileMimeType(struct uw_context *, uw_Basis_file);
 uw_Basis_blob uw_Basis_fileData(struct uw_context *, uw_Basis_file);
 uw_Basis_int uw_Basis_blobSize(struct uw_context *, uw_Basis_blob);
 uw_Basis_blob uw_Basis_textBlob(struct uw_context *, uw_Basis_string);
+uw_Basis_string uw_Basis_textOfBlob(struct uw_context *, uw_Basis_blob);
 
 uw_Basis_string uw_Basis_postType(struct uw_context *, uw_Basis_postBody);
 uw_Basis_string uw_Basis_postData(struct uw_context *, uw_Basis_postBody);
diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs
index c893e65d..be13c684 100644
--- a/lib/ur/basis.urs
+++ b/lib/ur/basis.urs
@@ -1019,6 +1019,8 @@ val checkMime : string -> option mimeType
 val returnBlob : t ::: Type -> blob -> mimeType -> transaction t
 val blobSize : blob -> int
 val textBlob : string -> blob
+val textOfBlob : blob -> option string
+(* Returns [Some] exactly when the blob contains no zero bytes. *)
 
 type postBody
 val postType : postBody -> string
diff --git a/src/c/urweb.c b/src/c/urweb.c
index ae2fc0a8..c8cfb0c6 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -4075,6 +4075,20 @@ uw_Basis_blob uw_Basis_textBlob(uw_context ctx, uw_Basis_string s) {
   return b;
 }
 
+uw_Basis_string uw_Basis_textOfBlob(uw_context ctx, uw_Basis_blob b) {
+  size_t i;
+  uw_Basis_string r;
+
+  for (i = 0; i < b.size; ++i)
+    if (b.data[i] == 0)
+      return NULL;
+
+  r = uw_malloc(ctx, b.size + 1);
+  memcpy(r, b.data, b.size);
+  r[b.size] = 0;
+  return r;
+}
+
 uw_Basis_blob uw_Basis_fileData(uw_context ctx, uw_Basis_file f) {
   (void)ctx;
   return f.data;
@@ -5207,7 +5221,7 @@ uw_unit uw_Basis_cache_file(uw_context ctx, uw_Basis_blob contents) {
 
   fd = mkstemp(tempfile);
   if (fd < 0)
-    uw_error(ctx, FATAL, "Error creating temporary file for cache");
+    uw_error(ctx, FATAL, "Error creating temporary file %s for cache", tempfile);
 
   while (written_so_far < contents.size) {
     ssize_t written_just_now = write(fd, contents.data + written_so_far, contents.size - written_so_far);
diff --git a/src/cjr_print.sml b/src/cjr_print.sml
index 31653a74..09cd9c7f 100644
--- a/src/cjr_print.sml
+++ b/src/cjr_print.sml
@@ -3391,6 +3391,14 @@ fun p_file env (ds, ps) =
              newline,
              string "#include <time.h>",
              newline,
+             (case Settings.getFileCache () of
+                  NONE => box []
+                | SOME _ => box [string "#include <sys/types.h>",
+                                 newline,
+                                 string "#include <sys/stat.h>",
+                                 newline,
+                                 string "#include <unistd.h>",
+                                 newline]),
              if hasDb then
                  box [string ("#include <" ^ #header (Settings.currentDbms ()) ^ ">"),
                       newline]
@@ -3655,7 +3663,21 @@ fun p_file env (ds, ps) =
              newline,
              string "static void uw_initializer(uw_context ctx) {",
              newline,
-             box [string "uw_begin_initializing(ctx);",
+             box [(case Settings.getFileCache () of
+                       NONE => box []
+                     | SOME dir => box [newline,
+                                        string "struct stat st = {0};",
+                                        newline,
+                                        newline,
+                                        string "if (stat(\"",
+                                        string (Prim.toCString dir),
+                                        string "\", &st) == -1)",
+                                        newline,
+                                        box [string "mkdir(\"",
+                                             string (Prim.toCString dir),
+                                             string "\", 0700);",
+                                             newline]]),
+                  string "uw_begin_initializing(ctx);",
                   newline,
                   p_list_sep newline (fn x => x) (rev (!global_initializers)),
                   string "uw_end_initializing(ctx);",
-- 
cgit v1.2.3


From 3f119f5c0a5f210ed442841dfed3ae98786004e9 Mon Sep 17 00:00:00 2001
From: Adam Chlipala <adam@chlipala.net>
Date: Sat, 23 Mar 2019 20:16:15 -0400
Subject: Supporting 'char' arguments to handlers called from client code

---
 include/urweb/urweb_cpp.h |  1 +
 src/c/urweb.c             | 17 +++++++++++++++++
 src/settings.sml          |  2 ++
 3 files changed, 20 insertions(+)

(limited to 'include/urweb/urweb_cpp.h')

diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h
index 18b5f583..dcf67fef 100644
--- a/include/urweb/urweb_cpp.h
+++ b/include/urweb/urweb_cpp.h
@@ -149,6 +149,7 @@ uw_Basis_unit uw_Basis_unurlifyUnit(struct uw_context * ctx, char **s);
 uw_Basis_int uw_Basis_unurlifyInt(struct uw_context *, char **);
 uw_Basis_float uw_Basis_unurlifyFloat(struct uw_context *, char **);
 uw_Basis_string uw_Basis_unurlifyString(struct uw_context *, char **);
+uw_Basis_char uw_Basis_unurlifyChar(struct uw_context *, char **);
 uw_Basis_string uw_Basis_unurlifyString_fromClient(struct uw_context *, char **);
 uw_Basis_bool uw_Basis_unurlifyBool(struct uw_context *, char **);
 uw_Basis_time uw_Basis_unurlifyTime(struct uw_context *, char **);
diff --git a/src/c/urweb.c b/src/c/urweb.c
index 58f7884d..4d9e8630 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -2267,6 +2267,23 @@ uw_Basis_string uw_Basis_unurlifyString(uw_context ctx, char **s) {
   return r;
 }
 
+uw_Basis_char uw_Basis_unurlifyChar(uw_context ctx, char **s) {
+  char *new_s = uw_unurlify_advance(*s);
+  char *r;
+  int len;
+
+  len = strlen(*s);
+  uw_check_heap(ctx, len + 1);
+
+  r = ctx->heap.front;
+  ctx->heap.front = uw_unurlifyString_to(0, ctx, ctx->heap.front, *s);
+  *s = new_s;
+  if (strlen(r) == 1)
+    return r[0];
+  else
+    uw_error(ctx, FATAL, "Unurlified character is multiple characters long");
+}
+
 uw_Basis_unit uw_Basis_unurlifyUnit(uw_context ctx, char **s) {
   (void)ctx;
   *s = uw_unurlify_advance(*s);
diff --git a/src/settings.sml b/src/settings.sml
index abb26f72..edc03d4c 100644
--- a/src/settings.sml
+++ b/src/settings.sml
@@ -117,6 +117,7 @@ fun basis x = S.addList (S.empty, map (fn x : string => ("Basis", x)) x)
 val clientToServerBase = basis ["int",
                                 "float",
                                 "string",
+                                "char",
                                 "time",
                                 "file",
                                 "unit",
@@ -277,6 +278,7 @@ val jsFuncsBase = basisM [("alert", "alert"),
                           ("urlifyFloat", "ts"),
                           ("urlifyTime", "ts"),
                           ("urlifyString", "uf"),
+                          ("urlifyChar", "uf"),
                           ("urlifyBool", "ub"),
                           ("recv", "rv"),
                           ("strcat", "cat"),
-- 
cgit v1.2.3


From 8728f397bee2b567611dcd7a7c359c7e92159c1c Mon Sep 17 00:00:00 2001
From: Adam Chlipala <adam@chlipala.net>
Date: Wed, 25 Sep 2019 19:54:59 -0400
Subject: Unicode escapes in JSON

---
 include/urweb/urweb_cpp.h |  1 +
 lib/ur/basis.urs          |  1 +
 lib/ur/json.ur            | 29 +++++++++++++++++++++++++++++
 src/c/urweb.c             | 12 ++++++++++++
 4 files changed, 43 insertions(+)

(limited to 'include/urweb/urweb_cpp.h')

diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h
index dcf67fef..e4ad6e61 100644
--- a/include/urweb/urweb_cpp.h
+++ b/include/urweb/urweb_cpp.h
@@ -166,6 +166,7 @@ uw_Basis_string uw_Basis_strchr(struct uw_context *, const char *, uw_Basis_char
 uw_Basis_int uw_Basis_strcspn(struct uw_context *, const char *, const char *);
 uw_Basis_string uw_Basis_substring(struct uw_context *, const char *, uw_Basis_int, uw_Basis_int);
 uw_Basis_string uw_Basis_str1(struct uw_context *, uw_Basis_char);
+uw_Basis_string uw_Basis_ofUnicode(struct uw_context *, uw_Basis_int);
 
 uw_Basis_string uw_strdup(struct uw_context *, const char *);
 uw_Basis_string uw_maybe_strdup(struct uw_context *, const char *);
diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs
index 2a98bf6f..d29bf6e6 100644
--- a/lib/ur/basis.urs
+++ b/lib/ur/basis.urs
@@ -95,6 +95,7 @@ val strsindex : string -> string -> option int
 val strcspn : string -> string -> int
 val substring : string -> int -> int -> string
 val str1 : char -> string
+val ofUnicode : int -> string
 
 class show
 val show : t ::: Type -> show t -> t -> string
diff --git a/lib/ur/json.ur b/lib/ur/json.ur
index 05406739..70f0c797 100644
--- a/lib/ur/json.ur
+++ b/lib/ur/json.ur
@@ -59,6 +59,17 @@ fun escape s =
         "\"" ^ esc s
     end
 
+fun unhex ch =
+    if Char.isDigit ch then
+        Char.toInt ch - Char.toInt #"0"
+    else if Char.isXdigit ch then
+        if Char.isUpper ch then
+            10 + (Char.toInt ch - Char.toInt #"A")
+        else
+            10 + (Char.toInt ch - Char.toInt #"a")
+    else
+        error <xml>Invalid hexadecimal digit "{[ch]}"</xml>
+    
 fun unescape s =
     let
         val len = String.length s
@@ -75,6 +86,11 @@ fun unescape s =
                       | #"\\" =>
                         if i+1 >= len then
                             error <xml>JSON unescape: Bad escape sequence: {[s]}</xml>
+                        else if String.sub s (i + 1) = #"u" then
+                            if i+5 >= len then
+                                error <xml>JSON unescape: Bad escape sequence: {[s]}</xml>
+                            else
+                                findEnd (i+6)
                         else
                             findEnd (i+2)
                       | _ => findEnd (i+1)
@@ -93,6 +109,19 @@ fun unescape s =
                         #"\\" =>
                         if i+1 >= len then
                             error <xml>JSON unescape: Bad escape sequence: {[s]}</xml>
+                        else if String.sub s (i+1) = #"u" then
+                            if i+5 >= len then
+                                error <xml>JSON unescape: Unicode ends early</xml>
+                            else
+                                let
+                                    val n =
+                                        unhex (String.sub s (i+2)) * (256*16)
+                                        + unhex (String.sub s (i+3)) * 256
+                                        + unhex (String.sub s (i+4)) * 16
+                                        + unhex (String.sub s (i+5))
+                                in
+                                    ofUnicode n ^ unesc (i+6)
+                                end
                         else
 			    (case String.sub s (i+1) of
 				 #"n" => "\n"
diff --git a/src/c/urweb.c b/src/c/urweb.c
index af929269..8c445f39 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -2724,6 +2724,18 @@ uw_Basis_string uw_Basis_str1(uw_context ctx, uw_Basis_char ch) {
   return r; 
 }
 
+uw_Basis_string uw_Basis_ofUnicode(uw_context ctx, uw_Basis_int n) {
+  UChar buf16[] = {n};
+  uw_Basis_string out = uw_malloc(ctx, 3);
+  int32_t outLen;
+  UErrorCode pErrorCode = 0;
+
+  if (u_strToUTF8(out, 3, &outLen, buf16, 1, &pErrorCode) == NULL || outLen == 0)
+    uw_error(ctx, FATAL, "Bad Unicode string to unescape (error %s)", u_errorName(pErrorCode));
+
+  return out;
+}
+
 uw_Basis_string uw_strdup(uw_context ctx, uw_Basis_string s1) {
   int len = strlen(s1) + 1;
   char *s;
-- 
cgit v1.2.3