From b50f472e65c0ffca5d485049325caa51298daa1a Mon Sep 17 00:00:00 2001 From: fab Date: Sun, 2 Dec 2018 00:46:46 +0000 Subject: 1 bug fix and sorting out my own confusion: uw_Basis_char is already a codepoint, NOT the "serialized" utf8 --- src/c/urweb.c | 37 ++++++------------------------------- tests/utf8.ur | 5 ++++- 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/src/c/urweb.c b/src/c/urweb.c index 195ddada..a4203376 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -1560,6 +1560,7 @@ const char *uw_Basis_get_settings(uw_context ctx, uw_unit u) { } uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char ch); + void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) { char* buffer = *buffer_ptr; @@ -1594,7 +1595,7 @@ void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) { } else { assert(65536 > c1); - sprintf(buffer, "\\u%04x", (unsigned char)c1); + sprintf(buffer, "\\u%04x", c1); buffer += 6; } } @@ -4488,43 +4489,17 @@ uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) { uw_Basis_bool uw_Basis_iscodepoint (uw_context ctx, uw_Basis_int n) { (void)ctx; - uw_Basis_char ch = (uw_Basis_char)n; - - if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) { - - if (U8_LENGTH(ch) == 0) { - return uw_Basis_False; - } - - if (u_charType(ch) == U_UNASSIGNED) { - return uw_Basis_False; - } - - } else { - return uw_Basis_False; - } - - return uw_Basis_True; + return !!(n <= 0x10FFFF); } uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) { (void)ctx; uw_Basis_char ch = (uw_Basis_char)n; - if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) { - - if (U8_LENGTH(ch) == 0) { - uw_error(ctx, FATAL, "The integer %lld cannot be converted to a char", n); - } - - if (u_charType(ch) == U_UNASSIGNED) { - uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n); - } - - } else { - uw_error(ctx, FATAL, "Integer %lld out of range of unicode chars", n); + if (n > 0x10FFFF) { + uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n); } - + return ch; } diff --git a/tests/utf8.ur b/tests/utf8.ur index 07ac9c3d..e7c7fd40 100644 --- a/tests/utf8.ur +++ b/tests/utf8.ur @@ -100,7 +100,10 @@ fun strcats () : transaction page = {test_cat_and_len 5 "àà" "áá" "ààáá" 4} {test_cat_and_len 6 "" "áá" "áá" 2} {test_cat_and_len 7 "àà" "" "àà" 2} - {test_cat_and_len 8 "函數" "ãã" "函數ãã" 4} + {test_cat_and_len 8 "函數" "ãã" "函數ãã" 4} + {test_cat_and_len 9 "ç" "ã" "çã" 2} + {test_cat_and_len 10 (show (strsub "ç" 0)) (show (strsub "ã" 0)) "çã" 2} + {test_cat_and_len 11 (show (chr 231)) (show (chr 227)) "çã" 2} end -- cgit v1.2.3