summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar fab <fabrice.leal.ch@gmail.com>2018-12-02 00:46:46 +0000
committerGravatar fab <fabrice.leal.ch@gmail.com>2018-12-02 00:46:46 +0000
commitb50f472e65c0ffca5d485049325caa51298daa1a (patch)
tree7d9acb9558a819764cc17f343916474995541956
parent5cc729b48aad084757a049b7e5cdbadae5e9e400 (diff)
1 bug fix and sorting out my own confusion: uw_Basis_char is already a codepoint, NOT the "serialized" utf8
-rw-r--r--src/c/urweb.c37
-rw-r--r--tests/utf8.ur5
2 files changed, 10 insertions, 32 deletions
diff --git a/src/c/urweb.c b/src/c/urweb.c
index 195ddada..a4203376 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -1560,6 +1560,7 @@ const char *uw_Basis_get_settings(uw_context ctx, uw_unit u) {
}
uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char ch);
+
void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) {
char* buffer = *buffer_ptr;
@@ -1594,7 +1595,7 @@ void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) {
}
else {
assert(65536 > c1);
- sprintf(buffer, "\\u%04x", (unsigned char)c1);
+ sprintf(buffer, "\\u%04x", c1);
buffer += 6;
}
}
@@ -4488,43 +4489,17 @@ uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) {
uw_Basis_bool uw_Basis_iscodepoint (uw_context ctx, uw_Basis_int n) {
(void)ctx;
- uw_Basis_char ch = (uw_Basis_char)n;
-
- if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) {
-
- if (U8_LENGTH(ch) == 0) {
- return uw_Basis_False;
- }
-
- if (u_charType(ch) == U_UNASSIGNED) {
- return uw_Basis_False;
- }
-
- } else {
- return uw_Basis_False;
- }
-
- return uw_Basis_True;
+ return !!(n <= 0x10FFFF);
}
uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) {
(void)ctx;
uw_Basis_char ch = (uw_Basis_char)n;
- if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) {
-
- if (U8_LENGTH(ch) == 0) {
- uw_error(ctx, FATAL, "The integer %lld cannot be converted to a char", n);
- }
-
- if (u_charType(ch) == U_UNASSIGNED) {
- uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n);
- }
-
- } else {
- uw_error(ctx, FATAL, "Integer %lld out of range of unicode chars", n);
+ if (n > 0x10FFFF) {
+ uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n);
}
-
+
return ch;
}
diff --git a/tests/utf8.ur b/tests/utf8.ur
index 07ac9c3d..e7c7fd40 100644
--- a/tests/utf8.ur
+++ b/tests/utf8.ur
@@ -100,7 +100,10 @@ fun strcats () : transaction page =
{test_cat_and_len 5 "àà" "áá" "ààáá" 4}
{test_cat_and_len 6 "" "áá" "áá" 2}
{test_cat_and_len 7 "àà" "" "àà" 2}
- {test_cat_and_len 8 "函數" "ãã" "函數ãã" 4}
+ {test_cat_and_len 8 "函數" "ãã" "函數ãã" 4}
+ {test_cat_and_len 9 "ç" "ã" "çã" 2}
+ {test_cat_and_len 10 (show (strsub "ç" 0)) (show (strsub "ã" 0)) "çã" 2}
+ {test_cat_and_len 11 (show (chr 231)) (show (chr 227)) "çã" 2}
</body>
</xml>
end