From eb86dffeeec897d17905f3adff84e6acfd018330 Mon Sep 17 00:00:00 2001 From: Denis Redozubov Date: Wed, 22 Aug 2018 15:11:32 +0300 Subject: Rough same page anchors --- include/urweb/urweb_cpp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/urweb') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 5f1144b8..1351cfbc 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -242,6 +242,7 @@ uw_Basis_string uw_Basis_blessEnvVar(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_blessMeta(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_checkUrl(struct uw_context *, uw_Basis_string); +uw_Basis_string uw_Basis_anchorUrl(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_checkMime(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_checkRequestHeader(struct uw_context *, uw_Basis_string); uw_Basis_string uw_Basis_checkResponseHeader(struct uw_context *, uw_Basis_string); -- cgit v1.2.3 From c2a217f9121dd865122bc6150c53e77bd662050d Mon Sep 17 00:00:00 2001 From: fab Date: Sat, 3 Nov 2018 20:09:20 +0000 Subject: utf-8 aware functions for basis. unit-testing. --- .travis.yml | 2 +- include/urweb/types_cpp.h | 3 +- src/c/Makefile.am | 2 +- src/c/urweb.c | 197 +++++++++++++------- src/compiler.sml | 4 +- tests/Makefile | 2 + tests/utf8.py | 449 ++++++++++++++++++++++++++++++++++++++++++++++ tests/utf8.ur | 431 ++++++++++++++++++++++++++++++++++++++++++++ tests/utf8.urp | 5 + 9 files changed, 1024 insertions(+), 71 deletions(-) create mode 100644 tests/utf8.py create mode 100644 tests/utf8.ur create mode 100644 tests/utf8.urp (limited to 'include/urweb') diff --git a/.travis.yml b/.travis.yml index df4e4abc..86d731cc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,7 @@ compiler: before_install: - export CONFIGURE_ARGS="" - if command -v apt-get &>/dev/null; then sudo apt-get update -qq; fi - - if command -v apt-get &>/dev/null; then sudo apt-get install -y mlton; fi + - if command -v apt-get &>/dev/null; then sudo apt-get install -y mlton lib-icudev; fi - if command -v brew &>/dev/null; then brew update; fi - if command -v brew &>/dev/null; then brew uninstall libtool; fi - if command -v brew &>/dev/null; then brew install libtool; fi diff --git a/include/urweb/types_cpp.h b/include/urweb/types_cpp.h index 0c546d1c..c6c0dd3e 100644 --- a/include/urweb/types_cpp.h +++ b/include/urweb/types_cpp.h @@ -4,11 +4,12 @@ #include #include #include +#include typedef long long uw_Basis_int; typedef double uw_Basis_float; typedef char* uw_Basis_string; -typedef char uw_Basis_char; +typedef UChar32 uw_Basis_char; typedef struct { time_t seconds; unsigned microseconds; diff --git a/src/c/Makefile.am b/src/c/Makefile.am index 027b1458..96c1d92f 100644 --- a/src/c/Makefile.am +++ b/src/c/Makefile.am @@ -11,7 +11,7 @@ AM_CFLAGS = -Wall -Wunused-parameter -Werror -Wno-format-security -Wno-deprecate liburweb_la_LDFLAGS = $(AM_LDFLAGS) $(OPENSSL_LDFLAGS) \ -export-symbols-regex '^(client_pruner|pthread_create_big|strcmp_nullsafe|uw_.*)' \ -version-info 1:0:0 -liburweb_la_LIBADD = $(PTHREAD_LIBS) -lm $(OPENSSL_LIBS) +liburweb_la_LIBADD = $(PTHREAD_LIBS) -lm $(OPENSSL_LIBS) -licui18n -licuuc -licudata liburweb_http_la_LIBADD = liburweb.la liburweb_http_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' \ -version-info 1:0:0 diff --git a/src/c/urweb.c b/src/c/urweb.c index 2e3e18bc..69c3da94 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -20,6 +20,9 @@ #include +#include +#include + #include "types.h" #include "uthash.h" @@ -2421,28 +2424,34 @@ uw_unit uw_Basis_htmlifySource_w(uw_context ctx, uw_Basis_source src) { return uw_unit_v; } -uw_Basis_char uw_Basis_strsub(uw_context ctx, uw_Basis_string s, uw_Basis_int n) { +uw_Basis_char uw_Basis_strsub(uw_context ctx, uw_Basis_string s, uw_Basis_int n) { + uw_Basis_char c; + int offset = 0; + while (n >= 0) { - if (*s == 0) + + if (s[offset] == 0) uw_error(ctx, FATAL, "Out-of-bounds strsub"); + U8_NEXT(s, offset, -1, c); + if (n == 0) - return *s; + return c; --n; - ++s; } uw_error(ctx, FATAL, "Negative strsub bound"); } uw_Basis_string uw_Basis_strsuffix(uw_context ctx, uw_Basis_string s, uw_Basis_int n) { + int offset = 0; while (n >= 0) { - if (*s == 0 || n == 0) - return s; + if (s[offset] == 0 || n == 0) + return s + offset; + U8_FWD_1(s, offset, -1); --n; - ++s; } uw_error(ctx, FATAL, "Negative strsuffix bound"); @@ -2450,40 +2459,80 @@ uw_Basis_string uw_Basis_strsuffix(uw_context ctx, uw_Basis_string s, uw_Basis_i uw_Basis_int uw_Basis_strlen(uw_context ctx, uw_Basis_string s) { (void)ctx; - return strlen(s); + int offset = 0, iterations = 0; + while (s[offset] != 0) { + U8_FWD_1(s, offset, -1); + ++iterations; + } + return iterations; } uw_Basis_bool uw_Basis_strlenGe(uw_context ctx, uw_Basis_string s, uw_Basis_int n) { (void)ctx; - + int offset = 0; while (n > 0) { - if (*s == 0) + if (s[offset] == 0) return uw_Basis_False; - + + U8_FWD_1(s, offset, -1); --n; - ++s; } return uw_Basis_True; } +int aux_strchr(uw_Basis_string s, uw_Basis_char ch, int* o_offset) { + int u8idx = 0, offset = 0; + uw_Basis_char c; + + while (s[offset] != 0) { + U8_NEXT(s, offset, -1, c); + if (c == ch) { + *o_offset = offset; + return u8idx; + } + + ++u8idx; + } + + *o_offset = -1; + return -1; +} + uw_Basis_string uw_Basis_strchr(uw_context ctx, uw_Basis_string s, uw_Basis_char ch) { (void)ctx; - return strchr(s, ch); + int offset = -1; + if (aux_strchr(s, ch, &offset) > -1) { + return s + offset; + } + return NULL; } uw_Basis_int uw_Basis_strcspn(uw_context ctx, uw_Basis_string s, uw_Basis_string chs) { (void)ctx; - return strcspn(s, chs); + int offset = 0, u8idx = 0, offsetChs = 0; + uw_Basis_char c; + + while (s[offset] != 0) { + U8_NEXT(s, offset, -1, c); + if (aux_strchr(chs, c, &offsetChs) > -1) { + return u8idx; + } + ++u8idx; + } + + return u8idx; } uw_Basis_int *uw_Basis_strindex(uw_context ctx, uw_Basis_string s, uw_Basis_char ch) { - uw_Basis_string r = strchr(s, ch); - if (r == NULL) + (void)ctx; + int offset = -1; + int r = aux_strchr(s, ch, &offset); + if (r == -1) return NULL; else { uw_Basis_int *nr = uw_malloc(ctx, sizeof(uw_Basis_int)); - *nr = r - s; + *nr = r; return nr; } } @@ -2494,13 +2543,19 @@ uw_Basis_int *uw_Basis_strsindex(uw_context ctx, const char *haystack, const cha return NULL; else { uw_Basis_int *nr = uw_malloc(ctx, sizeof(uw_Basis_int)); - *nr = r - haystack; + int src = r - haystack, offset = 0, utf8idx = 0; + while (offset < src) { + U8_FWD_1(haystack, offset, -1); + ++utf8idx; + } + + *nr = utf8idx; return nr; } } uw_Basis_string uw_Basis_strcat(uw_context ctx, uw_Basis_string s1, uw_Basis_string s2) { - int len = uw_Basis_strlen(ctx, s1) + uw_Basis_strlen(ctx, s2) + 1; + int len = strlen(s1) + strlen(s2) + 1; char *s; uw_check_heap(ctx, len); @@ -2515,8 +2570,8 @@ uw_Basis_string uw_Basis_strcat(uw_context ctx, uw_Basis_string s1, uw_Basis_str } uw_Basis_string uw_Basis_substring(uw_context ctx, uw_Basis_string s, uw_Basis_int start, uw_Basis_int len) { - size_t full_len = uw_Basis_strlen(ctx, s); - + int full_len = uw_Basis_strlen(ctx, s); + if (start < 0) uw_error(ctx, FATAL, "substring: Negative start index"); if (len < 0) @@ -2524,32 +2579,41 @@ uw_Basis_string uw_Basis_substring(uw_context ctx, uw_Basis_string s, uw_Basis_i if (start + len > full_len) uw_error(ctx, FATAL, "substring: Start index plus length is too large"); - if (start + len == full_len) - return &s[start]; - else { - uw_Basis_string r = uw_malloc(ctx, len+1); - memcpy(r, s+start, len); - r[len] = 0; + int offset = 0; + U8_FWD_N(s, offset, -1, start); + + if (start + len == full_len) { + return s + offset; + } else { + int end = offset; + U8_FWD_N(s, end, -1, len); + + int actual_len = end - offset; + + uw_Basis_string r = uw_malloc(ctx, actual_len + 1); + memcpy(r, s + offset, actual_len); + r[actual_len] = 0; return r; } - } uw_Basis_string uw_Basis_str1(uw_context ctx, uw_Basis_char ch) { char *r; - - uw_check_heap(ctx, 2); + int req = U8_LENGTH(ch); + int offset = 0; + + uw_check_heap(ctx, req + 1); r = ctx->heap.front; - r[0] = ch; - r[1] = 0; - ctx->heap.front += 2; + U8_APPEND_UNSAFE(r, offset, ch); + r[req] = 0; - return r; + ctx->heap.front += req + 1; + return r; } uw_Basis_string uw_strdup(uw_context ctx, uw_Basis_string s1) { - int len = uw_Basis_strlen(ctx, s1) + 1; + int len = strlen(s1) + 1; char *s; uw_check_heap(ctx, len); @@ -2676,7 +2740,6 @@ uw_Basis_string uw_Basis_sqlifyString(uw_context ctx, uw_Basis_string s) { uw_Basis_string uw_Basis_sqlifyChar(uw_context ctx, uw_Basis_char c) { char *r, *s2; - uw_check_heap(ctx, 5 + uw_Estrings + strlen(uw_sqlsuffixChar)); r = s2 = ctx->heap.front; @@ -2934,10 +2997,7 @@ uw_Basis_string uw_Basis_floatToString(uw_context ctx, uw_Basis_float n) { } uw_Basis_string uw_Basis_charToString(uw_context ctx, uw_Basis_char ch) { - char *r = uw_malloc(ctx, 2); - r[0] = ch; - r[1] = 0; - return r; + return uw_Basis_str1(ctx, ch); } uw_Basis_string uw_Basis_boolToString(uw_context ctx, uw_Basis_bool b) { @@ -2997,11 +3057,12 @@ uw_Basis_char *uw_Basis_stringToChar(uw_context ctx, uw_Basis_string s) { uw_Basis_char *r = uw_malloc(ctx, 1); r[0] = 0; return r; - } else if (s[1] != 0) + } else if (uw_Basis_strlenGe(ctx, s, 2) == uw_Basis_True) return NULL; else { uw_Basis_char *r = uw_malloc(ctx, 1); - r[0] = s[0]; + int offset = 0; + U8_NEXT(s, offset, -1, *r); return r; } } @@ -3126,10 +3187,14 @@ uw_Basis_float uw_Basis_stringToFloat_error(uw_context ctx, uw_Basis_string s) { uw_Basis_char uw_Basis_stringToChar_error(uw_context ctx, uw_Basis_string s) { if (s[0] == 0) return 0; - else if (s[1] != 0) + else if (uw_Basis_strlenGe(ctx, s, 2) == uw_Basis_True) uw_error(ctx, FATAL, "Can't parse char: %s", uw_Basis_htmlifyString(ctx, s)); - else - return s[0]; + else { + uw_Basis_char c; + int offset = 0; + U8_NEXT(s, offset, -1, c); + return c; + } } uw_Basis_bool uw_Basis_stringToBool_error(uw_context ctx, uw_Basis_string s) { @@ -4328,82 +4393,82 @@ void uw_set_global(uw_context ctx, char *name, void *data, void (*free)(void*)) uw_Basis_bool uw_Basis_isalnum(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isalnum((int)c); + return !!u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM); } uw_Basis_bool uw_Basis_isalpha(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isalpha((int)c); + return !!u_hasBinaryProperty(c, UCHAR_ALPHABETIC); } uw_Basis_bool uw_Basis_isblank(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isblank((int)c); + return !!u_hasBinaryProperty(c, UCHAR_POSIX_BLANK); } uw_Basis_bool uw_Basis_iscntrl(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!iscntrl((int)c); + return !!(u_charType(c)==U_CONTROL_CHAR); } uw_Basis_bool uw_Basis_isdigit(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isdigit((int)c); + return !!u_isdigit(c); } uw_Basis_bool uw_Basis_isgraph(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isgraph((int)c); + return !!u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH); } uw_Basis_bool uw_Basis_islower(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!islower((int)c); + return !!u_hasBinaryProperty(c, UCHAR_LOWERCASE); } uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isprint((int)c); + return !!u_hasBinaryProperty(c, UCHAR_POSIX_PRINT); } uw_Basis_bool uw_Basis_ispunct(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!ispunct((int)c); + return !!u_ispunct(c); } uw_Basis_bool uw_Basis_isspace(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isspace((int)c); + return !!u_hasBinaryProperty(c, UCHAR_WHITE_SPACE); } uw_Basis_bool uw_Basis_isupper(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isupper((int)c); + return !!u_hasBinaryProperty(c, UCHAR_UPPERCASE); } uw_Basis_bool uw_Basis_isxdigit(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isxdigit((int)c); + return !!u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT); } uw_Basis_char uw_Basis_tolower(uw_context ctx, uw_Basis_char c) { (void)ctx; - return tolower((int)c); + return u_tolower(c); } uw_Basis_char uw_Basis_toupper(uw_context ctx, uw_Basis_char c) { (void)ctx; - return toupper((int)c); + return u_toupper(c); } uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) { (void)ctx; - return (unsigned char)c; + return (uw_Basis_int)c; } uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) { (void)ctx; - return n; + return (uw_Basis_char)n; } uw_Basis_string uw_Basis_currentUrl(uw_context ctx) { @@ -4657,7 +4722,7 @@ uw_Basis_string uw_Basis_atom(uw_context ctx, uw_Basis_string s) { for (p = s; *p; ++p) { char c = *p; - if (!isalnum((int)c) && c != '+' && c != '-' && c != '.' && c != '%' && c != '#') + if (!U8_IS_SINGLE(c) && !isalnum((int)c) && c != '+' && c != '-' && c != '.' && c != '%' && c != '#') uw_error(ctx, FATAL, "Disallowed character in CSS atom"); } @@ -4669,7 +4734,7 @@ uw_Basis_string uw_Basis_css_url(uw_context ctx, uw_Basis_string s) { for (p = s; *p; ++p) { char c = *p; - if (!isalnum((int)c) && c != ':' && c != '/' && c != '.' && c != '_' && c != '+' + if (!U8_IS_SINGLE(c) && !isalnum((int)c) && c != ':' && c != '/' && c != '.' && c != '_' && c != '+' && c != '-' && c != '%' && c != '?' && c != '&' && c != '=' && c != '#') uw_error(ctx, FATAL, "Disallowed character in CSS URL"); } @@ -4688,7 +4753,7 @@ uw_Basis_string uw_Basis_property(uw_context ctx, uw_Basis_string s) { for (p = s; *p; ++p) { char c = *p; - if (!islower((int)c) && !isdigit((int)c) && c != '_' && c != '-') + if (!U8_IS_SINGLE(c) && !islower((int)c) && !isdigit((int)c) && c != '_' && c != '-') uw_error(ctx, FATAL, "Disallowed character in CSS property"); } @@ -5064,7 +5129,7 @@ void uw_Sqlcache_flush(uw_context ctx, uw_Sqlcache_Cache *cache, char **keys) { pthread_rwlock_unlock(&cache->lockIn); } -int strcmp_nullsafe(const char *str1, const char *str2) { +int strcmp_nullsafe(const char *str1, const char *str2) { if (str1) return strcmp(str1, str2); else @@ -5073,7 +5138,7 @@ int strcmp_nullsafe(const char *str1, const char *str2) { static int is_valid_hash(uw_Basis_string hash) { for (; *hash; ++hash) - if (!isxdigit(*hash)) + if (!U8_IS_SINGLE(*hash) && !isxdigit(*hash)) return 0; return 1; diff --git a/src/compiler.sml b/src/compiler.sml index f724bf56..9ee88c9b 100644 --- a/src/compiler.sml +++ b/src/compiler.sml @@ -1585,9 +1585,9 @@ fun compileC {cname, oname, ename, libs, profile, debug, linker, link = link'} = val proto = Settings.currentProtocol () val lib = if Settings.getBootLinking () then - !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a" + !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a -licui18n -licuuc -licudata" else if Settings.getStaticLinking () then - " -static " ^ !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a" + " -static " ^ !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a -licui18n -licuuc -licudata" else "-L" ^ !Settings.configLib ^ " " ^ #linkDynamic proto ^ " -lurweb" diff --git a/tests/Makefile b/tests/Makefile index ecf5557b..03e37e4b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -28,3 +28,5 @@ simple:: ./driver.sh fact ./driver.sh filter ./driver.sh jsbspace + ./driver.sh utf8 + diff --git a/tests/utf8.py b/tests/utf8.py new file mode 100644 index 00000000..ff9b737a --- /dev/null +++ b/tests/utf8.py @@ -0,0 +1,449 @@ +import unittest +import base + +class Suite(base.Base): + def test_1(self): + """Test case: substring (1)""" + self.start('Utf8/substrings') + + pre = self.xpath('pre[1]') + self.assertEqual('abc', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('bc', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('c', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('ábó', pre.text) + + pre = self.xpath('pre[5]') + self.assertEqual('bó', pre.text) + + pre = self.xpath('pre[6]') + self.assertEqual('ó', pre.text) + + pre = self.xpath('pre[7]') + self.assertEqual('çãó', pre.text) + + pre = self.xpath('pre[8]') + self.assertEqual('ãó', pre.text) + + pre = self.xpath('pre[9]') + self.assertEqual('ó', pre.text) + + pre = self.xpath('pre[10]') + self.assertEqual('', pre.text) + + pre = self.xpath('pre[11]') + self.assertEqual('', pre.text) + + + def test_2(self): + """Test case: strlen (2)""" + self.start('Utf8/strlens') + + pre = self.xpath('pre[1]') + self.assertEqual('3', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('3', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('3', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('3', pre.text) + + pre = self.xpath('pre[5]') + self.assertEqual('1', pre.text) + + pre = self.xpath('pre[6]') + self.assertEqual('1', pre.text) + + pre = self.xpath('pre[7]') + self.assertEqual('0', pre.text) + + pre = self.xpath('pre[8]') + self.assertEqual('1', pre.text) + + pre = self.xpath('pre[9]') + self.assertEqual('1', pre.text) + + pre = self.xpath('pre[10]') + self.assertEqual('1', pre.text) + + pre = self.xpath('pre[11]') + self.assertEqual('6', pre.text) + + pre = self.xpath('pre[12]') + self.assertEqual('2', pre.text) + + pre = self.xpath('pre[13]') + self.assertEqual('14', pre.text) + + + def test_3(self): + """Test case: strlenGe (3)""" + self.start('Utf8/strlenGens') + + pre = self.xpath('pre[1]') + self.assertEqual('False', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('True', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('False', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('True', pre.text) + + pre = self.xpath('pre[5]') + self.assertEqual('True', pre.text) + + pre = self.xpath('pre[6]') + self.assertEqual('False', pre.text) + + pre = self.xpath('pre[7]') + self.assertEqual('True', pre.text) + + pre = self.xpath('pre[8]') + self.assertEqual('True', pre.text) + + def test_4(self): + """Test case: strcat (4)""" + self.start('Utf8/strcats') + + pre = self.xpath('pre[1]') + self.assertEqual('', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('0', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('aabb', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('4', pre.text) + + pre = self.xpath('pre[5]') + self.assertEqual('bb', pre.text) + + pre = self.xpath('pre[6]') + self.assertEqual('2', pre.text) + + pre = self.xpath('pre[7]') + self.assertEqual('aa', pre.text) + + pre = self.xpath('pre[8]') + self.assertEqual('2', pre.text) + + pre = self.xpath('pre[9]') + self.assertEqual('ààáá', pre.text) + + pre = self.xpath('pre[10]') + self.assertEqual('4', pre.text) + + pre = self.xpath('pre[11]') + self.assertEqual('áá', pre.text) + + pre = self.xpath('pre[12]') + self.assertEqual('2', pre.text) + + pre = self.xpath('pre[13]') + self.assertEqual('àà', pre.text) + + pre = self.xpath('pre[14]') + self.assertEqual('2', pre.text) + + def test_5(self): + """Test case: strsub (5)""" + self.start('Utf8/strsubs') + + pre = self.xpath('pre[1]') + self.assertEqual('a', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('b', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('à', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('ç', pre.text) + + def test_6(self): + """Test case: strsuffix (6)""" + self.start('Utf8/strsuffixs') + + pre = self.xpath('pre[1]') + self.assertEqual('abàç', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('bàç', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('àç', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('ç', pre.text) + + def test_7(self): + """Test case: strchr (7)""" + self.start('Utf8/strchrs') + + pre = self.xpath('pre[1]') + self.assertEqual('None', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('Some "bàç"', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('Some "àç"', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('Some "ç"', pre.text) + + pre = self.xpath('pre[5]') + self.assertEqual('Some ""', pre.text) + + def test_8(self): + """Test case: strindex (8)""" + self.start('Utf8/strindexs') + + pre = self.xpath('pre[1]') + self.assertEqual('None', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('Some 0', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('Some 1', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('Some 2', pre.text) + + pre = self.xpath('pre[5]') + self.assertEqual('Some 3', pre.text) + + def test_9(self): + """Test case: strindex (9)""" + self.start('Utf8/strsindexs') + + pre = self.xpath('pre[1]') + # behavior of strstr C function + self.assertEqual('Some 0', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('Some 0', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('None', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('Some 1', pre.text) + + pre = self.xpath('pre[5]') + self.assertEqual('None', pre.text) + + pre = self.xpath('pre[6]') + self.assertEqual('Some 2', pre.text) + + pre = self.xpath('pre[7]') + self.assertEqual('None', pre.text) + + pre = self.xpath('pre[8]') + self.assertEqual('None', pre.text) + + pre = self.xpath('pre[9]') + self.assertEqual('Some 3', pre.text) + + def test_10(self): + """Test case: strcspn (10)""" + self.start('Utf8/strcspns') + + pre = self.xpath('pre[1]') + self.assertEqual('4', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('0', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('0', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('1', pre.text) + + pre = self.xpath('pre[5]') + self.assertEqual('2', pre.text) + + pre = self.xpath('pre[6]') + self.assertEqual('3', pre.text) + + def test_11(self): + """Test case: str1 (11)""" + self.start('Utf8/str1s') + + pre = self.xpath('pre[1]') + self.assertEqual('a', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('à', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('á', pre.text) + + def test_12(self): + """Test case: isalnum (12)""" + self.start('Utf8/isalnums') + + for idx in range(1, 9): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed isalnum: assert ' + str(idx)) + + def test_13(self): + """Test case: isalpha (13)""" + self.start('Utf8/isalphas') + + for idx in range(1, 9): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed isalpha: assert ' + str(idx)) + + def test_14(self): + """Test case: isblank (14)""" + self.start('Utf8/isblanks') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed isblank: assert ' + str(idx)) + + def test_15(self): + """Test case: iscntrl (15)""" + self.start('Utf8/iscntrls') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed iscntrl: assert ' + str(idx)) + + def test_16(self): + """Test case: isdigit (16)""" + self.start('Utf8/isdigits') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed isdigit: assert ' + str(idx)) + + + def test_17(self): + """Test case: isgraph (17)""" + self.start('Utf8/isgraphs') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed isgraph: assert ' + str(idx)) + + def test_18(self): + """Test case: islower (18)""" + self.start('Utf8/islowers') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed islower: assert ' + str(idx)) + + def test_19(self): + """Test case: isprint (19)""" + self.start('Utf8/isprints') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed isprint: assert ' + str(idx)) + + def test_20(self): + """Test case: ispunct (20)""" + self.start('Utf8/ispuncts') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed ispunct: assert ' + str(idx)) + + def test_21(self): + """Test case: isspace (21)""" + self.start('Utf8/isspaces') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed isspace: assert ' + str(idx)) + + def test_22(self): + """Test case: isupper (22)""" + self.start('Utf8/isuppers') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed isupper: assert ' + str(idx)) + + def test_23(self): + """Test case: isxdigit (23)""" + self.start('Utf8/isxdigits') + + for idx in range(1, 11): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed isxdigit: assert ' + str(idx)) + + def test_24(self): + """Test case: toupper (24)""" + self.start('Utf8/touppers') + + for idx in range(1, 6): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed toupper: assert ' + str(idx)) + + def test_25(self): + """Test case: ord (25)""" + self.start('Utf8/ord_and_chrs') + + for idx in range(1, 8): + pre = self.xpath('pre[' + str(idx) + ']') + self.assertEqual('True', pre.text, 'Failed ord: assert ' + str(idx)) + + def test_26 (self): + """Test case: test_db (26) """ + self.start('Utf8/test_db') + + pre = self.xpath('pre[1]') + self.assertEqual('abc', pre.text) + + pre = self.xpath('pre[2]') + self.assertEqual('3', pre.text) + + pre = self.xpath('pre[3]') + self.assertEqual('çãó', pre.text) + + pre = self.xpath('pre[4]') + self.assertEqual('3', pre.text) + + pre = self.xpath('pre[5]') + self.assertEqual('が', pre.text) + + pre = self.xpath('pre[6]') + self.assertEqual('1', pre.text) + + pre = self.xpath('pre[7]') + self.assertEqual('漢', pre.text) + + pre = self.xpath('pre[8]') + self.assertEqual('1', pre.text) + + pre = self.xpath('pre[9]') + self.assertEqual('カ', pre.text) + + pre = self.xpath('pre[10]') + self.assertEqual('1', pre.text) + + pre = self.xpath('pre[11]') + self.assertEqual('وظيفية', pre.text) + + pre = self.xpath('pre[12]') + self.assertEqual('6', pre.text) diff --git a/tests/utf8.ur b/tests/utf8.ur new file mode 100644 index 00000000..0dedc726 --- /dev/null +++ b/tests/utf8.ur @@ -0,0 +1,431 @@ +fun substrings () : transaction page = return + +
{[substring "abc" 0 3]}
+
{[substring "abc" 1 2]}
+
{[substring "abc" 2 1]}
+
{[substring "ábó" 0 3]}
+
{[substring "ábó" 1 2]}
+
{[substring "ábó" 2 1]}
+
{[substring "çãó" 0 3]}
+
{[substring "çãó" 1 2]}
+
{[substring "çãó" 2 1]}
+
{[substring "çãó" 2 0]}
+
{[substring "" 0 0]}
+ +
+ +fun strlens () : transaction page = return + +
{[strlen "abc"]}
+
{[strlen "çbc"]}
+
{[strlen "çãc"]}
+
{[strlen "çãó"]}
+
{[strlen "ç"]}
+
{[strlen "c"]}
+
{[strlen ""]}
+
{[strlen "が"]}
+
{[strlen "漢"]}
+
{[strlen "カ"]}
+
{[strlen "وظيفية"]}
+
{[strlen "函數"]}
+
{[strlen "Функциональное"]}
+ +
+ +fun strlenGens () : transaction page = return + +
{[strlenGe "" 1]}
+
{[strlenGe "" 0]}
+
{[strlenGe "aba" 4]}
+
{[strlenGe "aba" 3]}
+
{[strlenGe "aba" 2]}
+
{[strlenGe "áçà" 4]}
+
{[strlenGe "áçà" 3]}
+
{[strlenGe "áçà" 2]}
+ + +
+ +fun strcats () : transaction page = + let + fun catAndLen a b = + +
{[strcat a b]}
+
{[strlen (strcat a b)]}
+
+ in + return + + {catAndLen "" ""} + {catAndLen "aa" "bb"} + {catAndLen "" "bb"} + {catAndLen "aa" ""} + {catAndLen "àà" "áá"} + {catAndLen "" "áá"} + {catAndLen "àà" ""} + + +end + +fun strsubs () : transaction page = + return + +
{[strsub "abàç" 0]}
+
{[strsub "abàç" 1]}
+
{[strsub "abàç" 2]}
+
{[strsub "abàç" 3]}
+ +
+ +fun strsuffixs () : transaction page = + return + +
{[strsuffix "abàç" 0]}
+
{[strsuffix "abàç" 1]}
+
{[strsuffix "abàç" 2]}
+
{[strsuffix "abàç" 3]}
+ +
+ +fun strchrs () : transaction page = + let + fun optToStr ms = + case ms of + None => "None" + | Some s => "Some \"" ^ s ^ "\"" + + in + return + +
{[optToStr (strchr "abàç" #"c")]}
+
{[optToStr (strchr "abàç" #"a")]}
+
{[optToStr (strchr "abàç" #"b")]}
+
{[optToStr (strchr "abàç" (strsub "à" 0))]}
+
{[optToStr (strchr "abàç" (strsub "ç" 0))]}
+ +
+ end + +fun strindexs () : transaction page = + let + fun optToStr ms = + case ms of + None => "None" + | Some s => "Some " ^ (show s) + + in + return + +
{[optToStr (strindex "abàç" #"c")]}
+
{[optToStr (strindex "abàç" #"a")]}
+
{[optToStr (strindex "abàç" #"b")]}
+
{[optToStr (strindex "abàç" (strsub "à" 0))]}
+
{[optToStr (strindex "abàç" (strsub "ç" 0))]}
+ +
+ end + +fun strsindexs () : transaction page = + let + fun optToStr ms = + case ms of + None => "None" + | Some s => "Some " ^ (show s) + + in + return + +
{[optToStr (strsindex "abàç" "")]}
+
{[optToStr (strsindex "abàç" "abàç")]}
+
{[optToStr (strsindex "abàç" "abàc")]}
+
{[optToStr (strsindex "abàç" "bàç")]}
+
{[optToStr (strsindex "abàç" "bàc")]}
+
{[optToStr (strsindex "abàç" "àç")]}
+
{[optToStr (strsindex "abàç" "àc")]}
+
{[optToStr (strsindex "abàç" "ac")]}
+
{[optToStr (strsindex "abàç" "ç")]}
+ +
+ end + +fun strcspns () : transaction page = + return + +
{[strcspn "abàç" ""]}
+
{[strcspn "abàç" "abàç"]}
+
{[strcspn "abàç" "a"]}
+
{[strcspn "abàç" "bàç"]}
+
{[strcspn "abàç" "àç"]}
+
{[strcspn "abàç" "ç"]}
+ +
+ +fun str1s () : transaction page = return + +
{[str1 #"a"]}
+
{[str1 (strsub "à" 0)]}
+
{[str1 (strsub "aá" 1)]}
+ +
+ +fun isalnums () : transaction page = return + +
{[isalnum #"a"]}
+
{[isalnum (strsub "à" 0)]}
+
{[isalnum #"A"]}
+
{[isalnum (strsub "À" 0)]}
+
{[isalnum #"1"]}
+
{[not (isalnum #"!")]}
+
{[not (isalnum #"#")]}
+
{[not (isalnum #" ")]}
+ +
+ +fun isalphas () : transaction page = return + +
{[isalpha #"a"]}
+
{[isalpha (strsub "à" 0)]}
+
{[isalpha #"A"]}
+
{[isalpha (strsub "À" 0)]}
+
{[not (isalpha #"1")]}
+
{[not (isalpha #"!")]}
+
{[not (isalpha #"#")]}
+
{[not (isalpha #" ")]}
+ +
+ +fun isblanks () : transaction page = + return + +
{[not (isblank #"a")]}
+
{[not (isblank (strsub "à" 0))]}
+
{[not (isblank #"A")]}
+
{[not (isblank (strsub "À" 0))]}
+
{[not (isblank #"1")]}
+
{[not (isblank #"!")]}
+
{[not (isblank #"#")]}
+
{[isblank #" "]}
+
{[isblank #"\t"]}
+
{[not (isblank #"\n")]}
+ +
+ +fun iscntrls () : transaction page = + return + +
{[not (iscntrl #"a")]}
+
{[not (iscntrl (strsub "à" 0))]}
+
{[not (iscntrl #"A")]}
+
{[not (iscntrl (strsub "À" 0))]}
+
{[not (iscntrl #"1")]}
+
{[not (iscntrl #"!")]}
+
{[not (iscntrl #"#")]}
+
{[not (iscntrl #" ")]}
+
{[iscntrl #"\t"]}
+
{[iscntrl #"\n"]}
+ +
+ +fun isdigits () : transaction page = + return + +
{[not (isdigit #"a")]}
+
{[not (isdigit (strsub "à" 0))]}
+
{[not (isdigit #"A")]}
+
{[not (isdigit (strsub "À" 0))]}
+
{[isdigit #"1"]}
+
{[not (isdigit #"!")]}
+
{[not (isdigit #"#")]}
+
{[not (isdigit #" ")]}
+
{[not (isdigit #"\t")]}
+
{[not (isdigit #"\n")]}
+ +
+ +fun isgraphs () : transaction page = + return + +
{[isgraph #"a"]}
+
{[isgraph (strsub "à" 0)]}
+
{[isgraph #"A"]}
+
{[isgraph (strsub "À" 0)]}
+
{[isgraph #"1"]}
+
{[isgraph #"!"]}
+
{[isgraph #"#"]}
+
{[not (isgraph #" ")]}
+
{[not (isgraph #"\t")]}
+
{[not (isdigit #"\n")]}
+ +
+ +fun islowers () : transaction page = + return + +
{[islower #"a"]}
+
{[islower (strsub "à" 0)]}
+
{[not (islower #"A")]}
+
{[not (islower (strsub "À" 0))]}
+
{[not (islower #"1")]}
+
{[not (islower #"!")]}
+
{[not (islower #"#")]}
+
{[not (islower #" ")]}
+
{[not (islower #"\t")]}
+
{[not (islower #"\n")]}
+ +
+ +fun isprints () : transaction page = + return + +
{[isprint #"a"]}
+
{[isprint (strsub "à" 0)]}
+
{[isprint #"A"]}
+
{[isprint (strsub "À" 0)]}
+
{[isprint #"1"]}
+
{[isprint #"!"]}
+
{[isprint #"#"]}
+
{[isprint #" "]}
+
{[not (isprint #"\t")]}
+
{[not (isprint #"\n")]}
+ +
+ +fun ispuncts () : transaction page = + return + +
{[not (ispunct #"a")]}
+
{[not (ispunct (strsub "à" 0))]}
+
{[not (ispunct #"A")]}
+
{[not (ispunct (strsub "À" 0))]}
+
{[not (ispunct #"1")]}
+
{[ispunct #"!"]}
+
{[ispunct #"#"]}
+
{[not (ispunct #" ")]}
+
{[not (isprint #"\t")]}
+
{[not (isprint #"\n")]}
+ +
+ +fun isspaces () : transaction page = + return + +
{[not (isspace #"a")]}
+
{[not (isspace (strsub "à" 0))]}
+
{[not (isspace #"A")]}
+
{[not (isspace (strsub "À" 0))]}
+
{[not (isspace #"1")]}
+
{[not (isspace #"!")]}
+
{[not (isspace #"#")]}
+
{[isspace #" "]}
+
{[isspace #"\t"]}
+
{[isspace #"\n"]}
+ +
+ +fun isuppers () : transaction page = + return + +
{[not (isupper #"a")]}
+
{[not (isupper (strsub "à" 0))]}
+
{[isupper #"A"]}
+
{[isupper (strsub "À" 0)]}
+
{[not (isupper #"1")]}
+
{[not (isupper #"!")]}
+
{[not (isupper #"#")]}
+
{[not (isupper #" ")]}
+
{[not (isupper #"\t")]}
+
{[not (isupper #"\n")]}
+ +
+ +fun isxdigits () : transaction page = + return + +
{[isxdigit #"a"]}
+
{[not (isxdigit (strsub "à" 0))]}
+
{[isxdigit #"A"]}
+
{[not (isxdigit (strsub "À" 0))]}
+
{[isxdigit #"1"]}
+
{[not (isxdigit #"!")]}
+
{[not (isxdigit #"#")]}
+
{[not (isxdigit #" ")]}
+
{[not (isxdigit #"\t")]}
+
{[not (isxdigit #"\n")]}
+ +
+ +fun tolowers () : transaction page = + return + +
{[tolower #"A" = #"a"]}
+
{[tolower #"a" = #"a"]}
+
{[tolower (strsub "á" 0) = (strsub "á" 0)]}
+
{[tolower (strsub "Á" 0) = (strsub "á" 0)]}
+
{[tolower #"1" = #"1"]}
+ +
+ +fun touppers () : transaction page = + return + +
{[toupper #"A" = #"A"]}
+
{[toupper #"a" = #"A"]}
+
{[toupper (strsub "á" 0) = (strsub "Á" 0)]}
+
{[toupper (strsub "Á" 0) = (strsub "Á" 0)]}
+
{[toupper #"1" = #"1"]}
+ +
+ +fun ord_and_chrs () : transaction page = + return + +
{[chr (ord #"A") = #"A"]}
+
{[chr (ord #"a") = #"a"]}
+
{[chr (ord (strsub "á" 0)) = (strsub "á" 0)]}
+
{[chr (ord (strsub "Á" 0)) = (strsub "Á" 0)]}
+
{[chr (ord #"1") = #"1"]}
+
{[chr (ord #"\n") = #"\n"]}
+
{[chr (ord (strsub "が" 0)) = (strsub "が" 0)]}
+
{[chr (ord (strsub "漢" 0)) = (strsub "漢" 0)]}
+
{[chr (ord (strsub "カ" 0)) = (strsub "カ" 0)]}
+ +
+ +table t : { Id : int, Text : string } + + +fun test_db () : transaction page = + dml (INSERT INTO t (Id, Text) VALUES({[1]}, {["abc"]})); + t1 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 1); + + dml (INSERT INTO t (Id, Text) VALUES({[2]}, {["çãó"]})); + t2 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 2); + + dml (INSERT INTO t (Id, Text) VALUES({[3]}, {["が"]})); + t3 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 3); + + dml (INSERT INTO t (Id, Text) VALUES({[4]}, {["漢"]})); + t4 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 4); + + dml (INSERT INTO t (Id, Text) VALUES({[5]}, {["カ"]})); + t5 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 5); + + dml (INSERT INTO t (Id, Text) VALUES({[6]}, {["وظيفية"]})); + t6 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 6); + + return + +
{[t1.T.Text]}
+
{[strlen t1.T.Text]}
+
{[t2.T.Text]}
+
{[strlen t2.T.Text]}
+
{[t3.T.Text]}
+
{[strlen t3.T.Text]}
+
{[t4.T.Text]}
+
{[strlen t4.T.Text]}
+
{[t5.T.Text]}
+
{[strlen t5.T.Text]}
+
{[t6.T.Text]}
+
{[strlen t6.T.Text]}
+ +
diff --git a/tests/utf8.urp b/tests/utf8.urp new file mode 100644 index 00000000..9b3067af --- /dev/null +++ b/tests/utf8.urp @@ -0,0 +1,5 @@ +database dbname=utf8 +sql utf8.sql +safeGet Utf8/test_db + +utf8 \ No newline at end of file -- cgit v1.2.3 From 5cc729b48aad084757a049b7e5cdbadae5e9e400 Mon Sep 17 00:00:00 2001 From: fab Date: Fri, 30 Nov 2018 23:29:14 +0000 Subject: reject invalid codepoints. Basis.iscodepoint. fix german char in js --- include/urweb/urweb_cpp.h | 5 +- lib/js/urweb.js | 7 +- lib/ur/basis.urs | 2 + src/c/urweb.c | 265 +++++++++++++++++++++++++++------------------- 4 files changed, 168 insertions(+), 111 deletions(-) (limited to 'include/urweb') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 5f1144b8..25f97fb3 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -103,7 +103,7 @@ char *uw_Basis_htmlifyFloat(struct uw_context *, uw_Basis_float); char *uw_Basis_htmlifyString(struct uw_context *, uw_Basis_string); char *uw_Basis_htmlifyBool(struct uw_context *, uw_Basis_bool); char *uw_Basis_htmlifyTime(struct uw_context *, uw_Basis_time); -char *uw_Basis_htmlifySpecialChar(struct uw_context *, unsigned char); +char *uw_Basis_htmlifySpecialChar(struct uw_context *, uw_Basis_char); char *uw_Basis_htmlifySource(struct uw_context *, uw_Basis_source); uw_unit uw_Basis_htmlifyInt_w(struct uw_context *, uw_Basis_int); @@ -111,7 +111,7 @@ uw_unit uw_Basis_htmlifyFloat_w(struct uw_context *, uw_Basis_float); uw_unit uw_Basis_htmlifyString_w(struct uw_context *, uw_Basis_string); uw_unit uw_Basis_htmlifyBool_w(struct uw_context *, uw_Basis_bool); uw_unit uw_Basis_htmlifyTime_w(struct uw_context *, uw_Basis_time); -uw_unit uw_Basis_htmlifySpecialChar_w(struct uw_context *, unsigned char); +uw_unit uw_Basis_htmlifySpecialChar_w(struct uw_context *, uw_Basis_char); uw_unit uw_Basis_htmlifySource_w(struct uw_context *, uw_Basis_source); char *uw_Basis_attrifyInt(struct uw_context *, uw_Basis_int); @@ -327,6 +327,7 @@ uw_Basis_bool uw_Basis_isxdigit(struct uw_context *, uw_Basis_char); uw_Basis_char uw_Basis_tolower(struct uw_context *, uw_Basis_char); uw_Basis_char uw_Basis_toupper(struct uw_context *, uw_Basis_char); +uw_Basis_bool uw_Basis_iscodepoint(struct uw_context *, uw_Basis_int); uw_Basis_int uw_Basis_ord(struct uw_context *, uw_Basis_char); uw_Basis_char uw_Basis_chr(struct uw_context *, uw_Basis_int); diff --git a/lib/js/urweb.js b/lib/js/urweb.js index de1a2ad0..c7725e28 100644 --- a/lib/js/urweb.js +++ b/lib/js/urweb.js @@ -38,7 +38,12 @@ function isXdigit(c) { return isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' function ord(c) { return c.charCodeAt(0); } function isPrint(c) { return ord(c) > 31 && ord(c) != 127; } function toLower(c) { return c.toLowerCase(); } -function toUpper(c) { return c.toUpperCase(); } +function toUpper(c) { + if (ord(c) == 223) + return c; + else + return c.toUpperCase(); +} // Lists diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index 878f2793..c9d6556b 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -79,6 +79,8 @@ val toupper : char -> char val ord : char -> int val chr : int -> char +val iscodepoint : int -> bool + (** String operations *) val strlen : string -> int diff --git a/src/c/urweb.c b/src/c/urweb.c index be65afcc..195ddada 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -1559,101 +1559,89 @@ const char *uw_Basis_get_settings(uw_context ctx, uw_unit u) { } } -uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) { - char *r, *s2; - - uw_check_heap(ctx, strlen(s) * 4 + 3); - - r = s2 = ctx->heap.front; - *s2++ = '"'; - - for (; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '"': - strcpy(s2, "\\\""); - s2 += 2; - break; - case '\'': - strcpy(s2, "\\047"); - s2 += 4; - break; - case '\\': - strcpy(s2, "\\\\"); - s2 += 2; - break; - case '<': - strcpy(s2, "\\074"); - s2 += 4; - break; - case '&': - strcpy(s2, "\\046"); - s2 += 4; - break; - default: - if (isprint((int)c) || c >= 128) - *s2++ = c; - else { - sprintf(s2, "\\%03o", c); - s2 += 4; - } - } - } - - strcpy(s2, "\""); - ctx->heap.front = s2 + 2; - return r; -} - uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char ch); - -uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) { - char *r, *s2; - - uw_check_heap(ctx, 7); - - r = s2 = ctx->heap.front; - *s2++ = '"'; - +void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) { + char* buffer = *buffer_ptr; + switch (c1) { case '"': - strcpy(s2, "\\\""); - s2 += 2; + strcpy(buffer, "\\\""); + buffer += 2; break; case '\'': - strcpy(s2, "\\047"); - s2 += 4; + strcpy(buffer, "\\047"); + buffer += 4; break; case '\\': - strcpy(s2, "\\\\"); - s2 += 2; + strcpy(buffer, "\\\\"); + buffer += 2; break; case '<': - strcpy(s2, "\\074"); - s2 += 4; + strcpy(buffer, "\\074"); + buffer += 4; break; case '&': - strcpy(s2, "\\046"); - s2 += 4; + strcpy(buffer, "\\046"); + buffer += 4; break; default: if (uw_Basis_isprint(ctx, c1) == uw_Basis_True) { int offset = 0; - U8_APPEND_UNSAFE(s2, offset, c1); - s2 += offset; + U8_APPEND_UNSAFE(buffer, offset, c1); + buffer += offset; } else { - assert(0777 >= c1); - sprintf(s2, "\\%03o", (unsigned char)c1); - s2 += 4; + assert(65536 > c1); + sprintf(buffer, "\\u%04x", (unsigned char)c1); + buffer += 6; } } + + *buffer_ptr = buffer; +} + +uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) { + char *r, *s2; + uw_Basis_char c; + + uw_check_heap(ctx, strlen(s) * 6 + 3); + + r = s2 = ctx->heap.front; + *s2++ = '"'; + + int offset = 0; + while(s[offset] != 0) + { + U8_NEXT(s, offset, -1, c); + + jsifyChar(&s2, ctx, c); + } + strcpy(s2, "\""); ctx->heap.front = s2 + 2; + + return r; +} + +uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c); + +uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) { + char *r, *s2; + + uw_check_heap(ctx, 8); + + r = s2 = ctx->heap.front; + + *s2++ = '"'; + + jsifyChar(&s2, ctx, c1); + + strcpy(s2, "\""); + ctx->heap.front = s2 + 2; + return r; } @@ -1697,6 +1685,7 @@ uw_Basis_string uw_Basis_jsifyString_ws(uw_context ctx, uw_Basis_string s) { strcpy(s2, "\""); ctx->script.front = s2 + 1; + return r; } @@ -2262,25 +2251,27 @@ uw_unit uw_Basis_htmlifyInt_w(uw_context ctx, uw_Basis_int n) { return uw_unit_v; } -char *uw_Basis_htmlifySpecialChar(uw_context ctx, unsigned char ch) { +char *uw_Basis_htmlifySpecialChar(uw_context ctx, uw_Basis_char ch) { unsigned int n = ch; int len; char *r; - uw_check_heap(ctx, INTS_MAX+3); + uw_check_heap(ctx, INTS_MAX+3 + 1); r = ctx->heap.front; - sprintf(r, "&#%u;%n", n, &len); + len = sprintf(r, "&#%u;", n); ctx->heap.front += len+1; + return r; } -uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, unsigned char ch) { +uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, uw_Basis_char ch) { unsigned int n = ch; int len; uw_check(ctx, INTS_MAX+3); - sprintf(ctx->page.front, "&#%u;%n", n, &len); + len = sprintf(ctx->page.front, "&#%u;", n); ctx->page.front += len; + return uw_unit_v; } @@ -2328,48 +2319,69 @@ uw_unit uw_Basis_jsifyInt_w(uw_context ctx, uw_Basis_int n) { char *uw_Basis_htmlifyString(uw_context ctx, const char *s) { char *r, *s2; + uw_Basis_char c1; + int offset = 0, len = 0; + + uw_check_heap(ctx, strlen(s) * (INTS_MAX + 3) + 1); - uw_check_heap(ctx, strlen(s) * 5 + 1); - - for (r = s2 = ctx->heap.front; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '<': - strcpy(s2, "<"); - s2 += 4; - break; - case '&': - strcpy(s2, "&"); - s2 += 5; - break; - default: - *s2++ = c; + r = s2 = ctx->heap.front; + + while (s[offset] != 0) { + + U8_NEXT(s, offset, -1, c1); + + + if (U8_IS_SINGLE(c1) && uw_Basis_isprint(ctx, c1)) { + switch (c1) { + case '<': + strcpy(s2, "<"); + s2 += 4; + break; + case '&': + strcpy(s2, "&"); + s2 += 5; + break; + default: + *s2++ = c1; + } + } else { + len = sprintf(s2, "&#%u;", c1); + s2 += len; } } - + *s2++ = 0; ctx->heap.front = s2; + return r; } uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { uw_check(ctx, strlen(s) * 6); - - for (; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '<': - uw_write_unsafe(ctx, "<"); - break; - case '&': - uw_write_unsafe(ctx, "&"); - break; - default: - uw_writec_unsafe(ctx, c); + int offset = 0; + uw_Basis_char c1; + + while(s[offset] != 0){ + + U8_NEXT(s, offset, -1, c1); + + if (U8_IS_SINGLE(c1) && uw_Basis_isprint(ctx, c1)) { + + switch (c1) { + case '<': + uw_write_unsafe(ctx, "<"); + break; + case '&': + uw_write_unsafe(ctx, "&"); + break; + default: + uw_writec_unsafe(ctx, c1); + } } - } + else { + uw_Basis_htmlifySpecialChar_w(ctx, c1); + } + } return uw_unit_v; } @@ -4474,9 +4486,46 @@ uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) { return (uw_Basis_int)c; } +uw_Basis_bool uw_Basis_iscodepoint (uw_context ctx, uw_Basis_int n) { + (void)ctx; + uw_Basis_char ch = (uw_Basis_char)n; + + if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) { + + if (U8_LENGTH(ch) == 0) { + return uw_Basis_False; + } + + if (u_charType(ch) == U_UNASSIGNED) { + return uw_Basis_False; + } + + } else { + return uw_Basis_False; + } + + return uw_Basis_True; +} + uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) { (void)ctx; - return (uw_Basis_char)n; + uw_Basis_char ch = (uw_Basis_char)n; + + if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) { + + if (U8_LENGTH(ch) == 0) { + uw_error(ctx, FATAL, "The integer %lld cannot be converted to a char", n); + } + + if (u_charType(ch) == U_UNASSIGNED) { + uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n); + } + + } else { + uw_error(ctx, FATAL, "Integer %lld out of range of unicode chars", n); + } + + return ch; } uw_Basis_string uw_Basis_currentUrl(uw_context ctx) { -- cgit v1.2.3 From 28d130c8c3c2ef9cd229d09afe14fbcbcb954223 Mon Sep 17 00:00:00 2001 From: fab Date: Wed, 9 Jan 2019 22:34:53 +0000 Subject: urlifyChar needs to be added to .h file as well --- include/urweb/urweb_cpp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/urweb') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 25f97fb3..25f26e1b 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -138,6 +138,7 @@ char *uw_Basis_urlifySource(struct uw_context *, uw_Basis_source); uw_unit uw_Basis_urlifyInt_w(struct uw_context *, uw_Basis_int); uw_unit uw_Basis_urlifyFloat_w(struct uw_context *, uw_Basis_float); +uw_unit uw_Basis_urlifyChar_w(struct uw_context *, uw_Basis_char); uw_unit uw_Basis_urlifyString_w(struct uw_context *, uw_Basis_string); uw_unit uw_Basis_urlifyBool_w(struct uw_context *, uw_Basis_bool); uw_unit uw_Basis_urlifyTime_w(struct uw_context *, uw_Basis_time); -- cgit v1.2.3 From 87d2eab53f8e9f81cc459429675123c9ff36f41e Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Mon, 21 Jan 2019 18:09:59 -0500 Subject: Basis.textOfBlob; try creating filecache directory if it doesn't exist --- include/urweb/urweb_cpp.h | 1 + lib/ur/basis.urs | 2 ++ src/c/urweb.c | 16 +++++++++++++++- src/cjr_print.sml | 24 +++++++++++++++++++++++- 4 files changed, 41 insertions(+), 2 deletions(-) (limited to 'include/urweb') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 25f97fb3..67312015 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -262,6 +262,7 @@ uw_Basis_string uw_Basis_fileMimeType(struct uw_context *, uw_Basis_file); uw_Basis_blob uw_Basis_fileData(struct uw_context *, uw_Basis_file); uw_Basis_int uw_Basis_blobSize(struct uw_context *, uw_Basis_blob); uw_Basis_blob uw_Basis_textBlob(struct uw_context *, uw_Basis_string); +uw_Basis_string uw_Basis_textOfBlob(struct uw_context *, uw_Basis_blob); uw_Basis_string uw_Basis_postType(struct uw_context *, uw_Basis_postBody); uw_Basis_string uw_Basis_postData(struct uw_context *, uw_Basis_postBody); diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index c893e65d..be13c684 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -1019,6 +1019,8 @@ val checkMime : string -> option mimeType val returnBlob : t ::: Type -> blob -> mimeType -> transaction t val blobSize : blob -> int val textBlob : string -> blob +val textOfBlob : blob -> option string +(* Returns [Some] exactly when the blob contains no zero bytes. *) type postBody val postType : postBody -> string diff --git a/src/c/urweb.c b/src/c/urweb.c index ae2fc0a8..c8cfb0c6 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -4075,6 +4075,20 @@ uw_Basis_blob uw_Basis_textBlob(uw_context ctx, uw_Basis_string s) { return b; } +uw_Basis_string uw_Basis_textOfBlob(uw_context ctx, uw_Basis_blob b) { + size_t i; + uw_Basis_string r; + + for (i = 0; i < b.size; ++i) + if (b.data[i] == 0) + return NULL; + + r = uw_malloc(ctx, b.size + 1); + memcpy(r, b.data, b.size); + r[b.size] = 0; + return r; +} + uw_Basis_blob uw_Basis_fileData(uw_context ctx, uw_Basis_file f) { (void)ctx; return f.data; @@ -5207,7 +5221,7 @@ uw_unit uw_Basis_cache_file(uw_context ctx, uw_Basis_blob contents) { fd = mkstemp(tempfile); if (fd < 0) - uw_error(ctx, FATAL, "Error creating temporary file for cache"); + uw_error(ctx, FATAL, "Error creating temporary file %s for cache", tempfile); while (written_so_far < contents.size) { ssize_t written_just_now = write(fd, contents.data + written_so_far, contents.size - written_so_far); diff --git a/src/cjr_print.sml b/src/cjr_print.sml index 31653a74..09cd9c7f 100644 --- a/src/cjr_print.sml +++ b/src/cjr_print.sml @@ -3391,6 +3391,14 @@ fun p_file env (ds, ps) = newline, string "#include ", newline, + (case Settings.getFileCache () of + NONE => box [] + | SOME _ => box [string "#include ", + newline, + string "#include ", + newline, + string "#include ", + newline]), if hasDb then box [string ("#include <" ^ #header (Settings.currentDbms ()) ^ ">"), newline] @@ -3655,7 +3663,21 @@ fun p_file env (ds, ps) = newline, string "static void uw_initializer(uw_context ctx) {", newline, - box [string "uw_begin_initializing(ctx);", + box [(case Settings.getFileCache () of + NONE => box [] + | SOME dir => box [newline, + string "struct stat st = {0};", + newline, + newline, + string "if (stat(\"", + string (Prim.toCString dir), + string "\", &st) == -1)", + newline, + box [string "mkdir(\"", + string (Prim.toCString dir), + string "\", 0700);", + newline]]), + string "uw_begin_initializing(ctx);", newline, p_list_sep newline (fn x => x) (rev (!global_initializers)), string "uw_end_initializing(ctx);", -- cgit v1.2.3 From 3f119f5c0a5f210ed442841dfed3ae98786004e9 Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Sat, 23 Mar 2019 20:16:15 -0400 Subject: Supporting 'char' arguments to handlers called from client code --- include/urweb/urweb_cpp.h | 1 + src/c/urweb.c | 17 +++++++++++++++++ src/settings.sml | 2 ++ 3 files changed, 20 insertions(+) (limited to 'include/urweb') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 18b5f583..dcf67fef 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -149,6 +149,7 @@ uw_Basis_unit uw_Basis_unurlifyUnit(struct uw_context * ctx, char **s); uw_Basis_int uw_Basis_unurlifyInt(struct uw_context *, char **); uw_Basis_float uw_Basis_unurlifyFloat(struct uw_context *, char **); uw_Basis_string uw_Basis_unurlifyString(struct uw_context *, char **); +uw_Basis_char uw_Basis_unurlifyChar(struct uw_context *, char **); uw_Basis_string uw_Basis_unurlifyString_fromClient(struct uw_context *, char **); uw_Basis_bool uw_Basis_unurlifyBool(struct uw_context *, char **); uw_Basis_time uw_Basis_unurlifyTime(struct uw_context *, char **); diff --git a/src/c/urweb.c b/src/c/urweb.c index 58f7884d..4d9e8630 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -2267,6 +2267,23 @@ uw_Basis_string uw_Basis_unurlifyString(uw_context ctx, char **s) { return r; } +uw_Basis_char uw_Basis_unurlifyChar(uw_context ctx, char **s) { + char *new_s = uw_unurlify_advance(*s); + char *r; + int len; + + len = strlen(*s); + uw_check_heap(ctx, len + 1); + + r = ctx->heap.front; + ctx->heap.front = uw_unurlifyString_to(0, ctx, ctx->heap.front, *s); + *s = new_s; + if (strlen(r) == 1) + return r[0]; + else + uw_error(ctx, FATAL, "Unurlified character is multiple characters long"); +} + uw_Basis_unit uw_Basis_unurlifyUnit(uw_context ctx, char **s) { (void)ctx; *s = uw_unurlify_advance(*s); diff --git a/src/settings.sml b/src/settings.sml index abb26f72..edc03d4c 100644 --- a/src/settings.sml +++ b/src/settings.sml @@ -117,6 +117,7 @@ fun basis x = S.addList (S.empty, map (fn x : string => ("Basis", x)) x) val clientToServerBase = basis ["int", "float", "string", + "char", "time", "file", "unit", @@ -277,6 +278,7 @@ val jsFuncsBase = basisM [("alert", "alert"), ("urlifyFloat", "ts"), ("urlifyTime", "ts"), ("urlifyString", "uf"), + ("urlifyChar", "uf"), ("urlifyBool", "ub"), ("recv", "rv"), ("strcat", "cat"), -- cgit v1.2.3 From 8728f397bee2b567611dcd7a7c359c7e92159c1c Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Wed, 25 Sep 2019 19:54:59 -0400 Subject: Unicode escapes in JSON --- include/urweb/urweb_cpp.h | 1 + lib/ur/basis.urs | 1 + lib/ur/json.ur | 29 +++++++++++++++++++++++++++++ src/c/urweb.c | 12 ++++++++++++ 4 files changed, 43 insertions(+) (limited to 'include/urweb') diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index dcf67fef..e4ad6e61 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -166,6 +166,7 @@ uw_Basis_string uw_Basis_strchr(struct uw_context *, const char *, uw_Basis_char uw_Basis_int uw_Basis_strcspn(struct uw_context *, const char *, const char *); uw_Basis_string uw_Basis_substring(struct uw_context *, const char *, uw_Basis_int, uw_Basis_int); uw_Basis_string uw_Basis_str1(struct uw_context *, uw_Basis_char); +uw_Basis_string uw_Basis_ofUnicode(struct uw_context *, uw_Basis_int); uw_Basis_string uw_strdup(struct uw_context *, const char *); uw_Basis_string uw_maybe_strdup(struct uw_context *, const char *); diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index 2a98bf6f..d29bf6e6 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -95,6 +95,7 @@ val strsindex : string -> string -> option int val strcspn : string -> string -> int val substring : string -> int -> int -> string val str1 : char -> string +val ofUnicode : int -> string class show val show : t ::: Type -> show t -> t -> string diff --git a/lib/ur/json.ur b/lib/ur/json.ur index 05406739..70f0c797 100644 --- a/lib/ur/json.ur +++ b/lib/ur/json.ur @@ -59,6 +59,17 @@ fun escape s = "\"" ^ esc s end +fun unhex ch = + if Char.isDigit ch then + Char.toInt ch - Char.toInt #"0" + else if Char.isXdigit ch then + if Char.isUpper ch then + 10 + (Char.toInt ch - Char.toInt #"A") + else + 10 + (Char.toInt ch - Char.toInt #"a") + else + error Invalid hexadecimal digit "{[ch]}" + fun unescape s = let val len = String.length s @@ -75,6 +86,11 @@ fun unescape s = | #"\\" => if i+1 >= len then error JSON unescape: Bad escape sequence: {[s]} + else if String.sub s (i + 1) = #"u" then + if i+5 >= len then + error JSON unescape: Bad escape sequence: {[s]} + else + findEnd (i+6) else findEnd (i+2) | _ => findEnd (i+1) @@ -93,6 +109,19 @@ fun unescape s = #"\\" => if i+1 >= len then error JSON unescape: Bad escape sequence: {[s]} + else if String.sub s (i+1) = #"u" then + if i+5 >= len then + error JSON unescape: Unicode ends early + else + let + val n = + unhex (String.sub s (i+2)) * (256*16) + + unhex (String.sub s (i+3)) * 256 + + unhex (String.sub s (i+4)) * 16 + + unhex (String.sub s (i+5)) + in + ofUnicode n ^ unesc (i+6) + end else (case String.sub s (i+1) of #"n" => "\n" diff --git a/src/c/urweb.c b/src/c/urweb.c index af929269..8c445f39 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -2724,6 +2724,18 @@ uw_Basis_string uw_Basis_str1(uw_context ctx, uw_Basis_char ch) { return r; } +uw_Basis_string uw_Basis_ofUnicode(uw_context ctx, uw_Basis_int n) { + UChar buf16[] = {n}; + uw_Basis_string out = uw_malloc(ctx, 3); + int32_t outLen; + UErrorCode pErrorCode = 0; + + if (u_strToUTF8(out, 3, &outLen, buf16, 1, &pErrorCode) == NULL || outLen == 0) + uw_error(ctx, FATAL, "Bad Unicode string to unescape (error %s)", u_errorName(pErrorCode)); + + return out; +} + uw_Basis_string uw_strdup(uw_context ctx, uw_Basis_string s1) { int len = strlen(s1) + 1; char *s; -- cgit v1.2.3