From b50f472e65c0ffca5d485049325caa51298daa1a Mon Sep 17 00:00:00 2001
From: fab <fabrice.leal.ch@gmail.com>
Date: Sun, 2 Dec 2018 00:46:46 +0000
Subject: 1 bug fix and sorting out my own confusion: uw_Basis_char is already
 a codepoint, NOT the "serialized" utf8

---
 src/c/urweb.c | 37 ++++++-------------------------------
 tests/utf8.ur |  5 ++++-
 2 files changed, 10 insertions(+), 32 deletions(-)
diff --git a/src/c/urweb.c b/src/c/urweb.c
index 195ddada..a4203376 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -1560,6 +1560,7 @@ const char *uw_Basis_get_settings(uw_context ctx, uw_unit u) {
 }
 
 uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char ch);
+
 void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) {
   char* buffer = *buffer_ptr;
   
@@ -1594,7 +1595,7 @@ void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) {
       }
     else {
       assert(65536 > c1);
-      sprintf(buffer, "\\u%04x", (unsigned char)c1);
+      sprintf(buffer, "\\u%04x", c1);
       buffer += 6;
     }
   }
@@ -4488,43 +4489,17 @@ uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) {
 
 uw_Basis_bool uw_Basis_iscodepoint (uw_context ctx, uw_Basis_int n) {
   (void)ctx;
-  uw_Basis_char ch = (uw_Basis_char)n;
-
-  if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) {
-
-    if (U8_LENGTH(ch) == 0) {
-      return uw_Basis_False;
-    }
-
-    if (u_charType(ch) == U_UNASSIGNED) {
-      return uw_Basis_False;
-    }
-
-  } else {
-    return uw_Basis_False;
-  }
-
-  return uw_Basis_True;
+  return !!(n <= 0x10FFFF);
 }
 
 uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) {
   (void)ctx;
   uw_Basis_char ch = (uw_Basis_char)n;
 
-  if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) {
-
-    if (U8_LENGTH(ch) == 0) {
-      uw_error(ctx, FATAL, "The integer %lld cannot be converted to a char", n);
-    }
-
-    if (u_charType(ch) == U_UNASSIGNED) {
-      uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n);
-    }
-
-  } else {
-    uw_error(ctx, FATAL, "Integer %lld out of range of unicode chars", n);
+  if (n > 0x10FFFF) {
+    uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n);
   }
- 
+
   return ch;
 }
 
diff --git a/tests/utf8.ur b/tests/utf8.ur
index 07ac9c3d..e7c7fd40 100644
--- a/tests/utf8.ur
+++ b/tests/utf8.ur
@@ -100,7 +100,10 @@ fun strcats () : transaction page =
 	    {test_cat_and_len 5 "àà" "áá" "ààáá" 4}
 	    {test_cat_and_len 6 "" "áá" "áá" 2}
 	    {test_cat_and_len 7 "àà" "" "àà" 2}
-	    {test_cat_and_len 8 "函數" "ãã" "函數ãã" 4}	     
+	    {test_cat_and_len 8 "函數" "ãã" "函數ãã" 4}
+	    {test_cat_and_len 9 "ç" "ã" "çã" 2}
+	    {test_cat_and_len 10 (show (strsub "ç" 0)) (show (strsub "ã" 0)) "çã" 2}
+	    {test_cat_and_len 11 (show (chr 231)) (show (chr 227)) "çã" 2}
 	  </body>
 	</xml>
 end
-- 
cgit v1.2.3