summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar fab <fabrice.leal.ch@gmail.com>2018-11-03 20:09:20 +0000
committerGravatar fab <fabrice.leal.ch@gmail.com>2018-11-17 21:42:00 +0000
commitc2a217f9121dd865122bc6150c53e77bd662050d (patch)
tree6e0fe17e3c7f590b547838b4d465428eaed41183
parent39497fe7d80a6765b131b0a62ab48db8bd47a8e9 (diff)
utf-8 aware functions for basis. unit-testing.
-rw-r--r--.travis.yml2
-rw-r--r--include/urweb/types_cpp.h3
-rw-r--r--src/c/Makefile.am2
-rw-r--r--src/c/urweb.c197
-rw-r--r--src/compiler.sml4
-rw-r--r--tests/Makefile2
-rw-r--r--tests/utf8.py449
-rw-r--r--tests/utf8.ur431
-rw-r--r--tests/utf8.urp5
9 files changed, 1024 insertions, 71 deletions
diff --git a/.travis.yml b/.travis.yml
index df4e4abc..86d731cc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,7 +18,7 @@ compiler:
before_install:
- export CONFIGURE_ARGS=""
- if command -v apt-get &>/dev/null; then sudo apt-get update -qq; fi
- - if command -v apt-get &>/dev/null; then sudo apt-get install -y mlton; fi
+ - if command -v apt-get &>/dev/null; then sudo apt-get install -y mlton lib-icudev; fi
- if command -v brew &>/dev/null; then brew update; fi
- if command -v brew &>/dev/null; then brew uninstall libtool; fi
- if command -v brew &>/dev/null; then brew install libtool; fi
diff --git a/include/urweb/types_cpp.h b/include/urweb/types_cpp.h
index 0c546d1c..c6c0dd3e 100644
--- a/include/urweb/types_cpp.h
+++ b/include/urweb/types_cpp.h
@@ -4,11 +4,12 @@
#include <time.h>
#include <unistd.h>
#include <stdint.h>
+#include <unicode/utypes.h>
typedef long long uw_Basis_int;
typedef double uw_Basis_float;
typedef char* uw_Basis_string;
-typedef char uw_Basis_char;
+typedef UChar32 uw_Basis_char;
typedef struct {
time_t seconds;
unsigned microseconds;
diff --git a/src/c/Makefile.am b/src/c/Makefile.am
index 027b1458..96c1d92f 100644
--- a/src/c/Makefile.am
+++ b/src/c/Makefile.am
@@ -11,7 +11,7 @@ AM_CFLAGS = -Wall -Wunused-parameter -Werror -Wno-format-security -Wno-deprecate
liburweb_la_LDFLAGS = $(AM_LDFLAGS) $(OPENSSL_LDFLAGS) \
-export-symbols-regex '^(client_pruner|pthread_create_big|strcmp_nullsafe|uw_.*)' \
-version-info 1:0:0
-liburweb_la_LIBADD = $(PTHREAD_LIBS) -lm $(OPENSSL_LIBS)
+liburweb_la_LIBADD = $(PTHREAD_LIBS) -lm $(OPENSSL_LIBS) -licui18n -licuuc -licudata
liburweb_http_la_LIBADD = liburweb.la
liburweb_http_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' \
-version-info 1:0:0
diff --git a/src/c/urweb.c b/src/c/urweb.c
index 2e3e18bc..69c3da94 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -20,6 +20,9 @@
#include <pthread.h>
+#include <unicode/utf8.h>
+#include <unicode/uchar.h>
+
#include "types.h"
#include "uthash.h"
@@ -2421,28 +2424,34 @@ uw_unit uw_Basis_htmlifySource_w(uw_context ctx, uw_Basis_source src) {
return uw_unit_v;
}
-uw_Basis_char uw_Basis_strsub(uw_context ctx, uw_Basis_string s, uw_Basis_int n) {
+uw_Basis_char uw_Basis_strsub(uw_context ctx, uw_Basis_string s, uw_Basis_int n) {
+ uw_Basis_char c;
+ int offset = 0;
+
while (n >= 0) {
- if (*s == 0)
+
+ if (s[offset] == 0)
uw_error(ctx, FATAL, "Out-of-bounds strsub");
+ U8_NEXT(s, offset, -1, c);
+
if (n == 0)
- return *s;
+ return c;
--n;
- ++s;
}
uw_error(ctx, FATAL, "Negative strsub bound");
}
uw_Basis_string uw_Basis_strsuffix(uw_context ctx, uw_Basis_string s, uw_Basis_int n) {
+ int offset = 0;
while (n >= 0) {
- if (*s == 0 || n == 0)
- return s;
+ if (s[offset] == 0 || n == 0)
+ return s + offset;
+ U8_FWD_1(s, offset, -1);
--n;
- ++s;
}
uw_error(ctx, FATAL, "Negative strsuffix bound");
@@ -2450,40 +2459,80 @@ uw_Basis_string uw_Basis_strsuffix(uw_context ctx, uw_Basis_string s, uw_Basis_i
uw_Basis_int uw_Basis_strlen(uw_context ctx, uw_Basis_string s) {
(void)ctx;
- return strlen(s);
+ int offset = 0, iterations = 0;
+ while (s[offset] != 0) {
+ U8_FWD_1(s, offset, -1);
+ ++iterations;
+ }
+ return iterations;
}
uw_Basis_bool uw_Basis_strlenGe(uw_context ctx, uw_Basis_string s, uw_Basis_int n) {
(void)ctx;
-
+ int offset = 0;
while (n > 0) {
- if (*s == 0)
+ if (s[offset] == 0)
return uw_Basis_False;
-
+
+ U8_FWD_1(s, offset, -1);
--n;
- ++s;
}
return uw_Basis_True;
}
+int aux_strchr(uw_Basis_string s, uw_Basis_char ch, int* o_offset) {
+ int u8idx = 0, offset = 0;
+ uw_Basis_char c;
+
+ while (s[offset] != 0) {
+ U8_NEXT(s, offset, -1, c);
+ if (c == ch) {
+ *o_offset = offset;
+ return u8idx;
+ }
+
+ ++u8idx;
+ }
+
+ *o_offset = -1;
+ return -1;
+}
+
uw_Basis_string uw_Basis_strchr(uw_context ctx, uw_Basis_string s, uw_Basis_char ch) {
(void)ctx;
- return strchr(s, ch);
+ int offset = -1;
+ if (aux_strchr(s, ch, &offset) > -1) {
+ return s + offset;
+ }
+ return NULL;
}
uw_Basis_int uw_Basis_strcspn(uw_context ctx, uw_Basis_string s, uw_Basis_string chs) {
(void)ctx;
- return strcspn(s, chs);
+ int offset = 0, u8idx = 0, offsetChs = 0;
+ uw_Basis_char c;
+
+ while (s[offset] != 0) {
+ U8_NEXT(s, offset, -1, c);
+ if (aux_strchr(chs, c, &offsetChs) > -1) {
+ return u8idx;
+ }
+ ++u8idx;
+ }
+
+ return u8idx;
}
uw_Basis_int *uw_Basis_strindex(uw_context ctx, uw_Basis_string s, uw_Basis_char ch) {
- uw_Basis_string r = strchr(s, ch);
- if (r == NULL)
+ (void)ctx;
+ int offset = -1;
+ int r = aux_strchr(s, ch, &offset);
+ if (r == -1)
return NULL;
else {
uw_Basis_int *nr = uw_malloc(ctx, sizeof(uw_Basis_int));
- *nr = r - s;
+ *nr = r;
return nr;
}
}
@@ -2494,13 +2543,19 @@ uw_Basis_int *uw_Basis_strsindex(uw_context ctx, const char *haystack, const cha
return NULL;
else {
uw_Basis_int *nr = uw_malloc(ctx, sizeof(uw_Basis_int));
- *nr = r - haystack;
+ int src = r - haystack, offset = 0, utf8idx = 0;
+ while (offset < src) {
+ U8_FWD_1(haystack, offset, -1);
+ ++utf8idx;
+ }
+
+ *nr = utf8idx;
return nr;
}
}
uw_Basis_string uw_Basis_strcat(uw_context ctx, uw_Basis_string s1, uw_Basis_string s2) {
- int len = uw_Basis_strlen(ctx, s1) + uw_Basis_strlen(ctx, s2) + 1;
+ int len = strlen(s1) + strlen(s2) + 1;
char *s;
uw_check_heap(ctx, len);
@@ -2515,8 +2570,8 @@ uw_Basis_string uw_Basis_strcat(uw_context ctx, uw_Basis_string s1, uw_Basis_str
}
uw_Basis_string uw_Basis_substring(uw_context ctx, uw_Basis_string s, uw_Basis_int start, uw_Basis_int len) {
- size_t full_len = uw_Basis_strlen(ctx, s);
-
+ int full_len = uw_Basis_strlen(ctx, s);
+
if (start < 0)
uw_error(ctx, FATAL, "substring: Negative start index");
if (len < 0)
@@ -2524,32 +2579,41 @@ uw_Basis_string uw_Basis_substring(uw_context ctx, uw_Basis_string s, uw_Basis_i
if (start + len > full_len)
uw_error(ctx, FATAL, "substring: Start index plus length is too large");
- if (start + len == full_len)
- return &s[start];
- else {
- uw_Basis_string r = uw_malloc(ctx, len+1);
- memcpy(r, s+start, len);
- r[len] = 0;
+ int offset = 0;
+ U8_FWD_N(s, offset, -1, start);
+
+ if (start + len == full_len) {
+ return s + offset;
+ } else {
+ int end = offset;
+ U8_FWD_N(s, end, -1, len);
+
+ int actual_len = end - offset;
+
+ uw_Basis_string r = uw_malloc(ctx, actual_len + 1);
+ memcpy(r, s + offset, actual_len);
+ r[actual_len] = 0;
return r;
}
-
}
uw_Basis_string uw_Basis_str1(uw_context ctx, uw_Basis_char ch) {
char *r;
-
- uw_check_heap(ctx, 2);
+ int req = U8_LENGTH(ch);
+ int offset = 0;
+
+ uw_check_heap(ctx, req + 1);
r = ctx->heap.front;
- r[0] = ch;
- r[1] = 0;
- ctx->heap.front += 2;
+ U8_APPEND_UNSAFE(r, offset, ch);
+ r[req] = 0;
- return r;
+ ctx->heap.front += req + 1;
+ return r;
}
uw_Basis_string uw_strdup(uw_context ctx, uw_Basis_string s1) {
- int len = uw_Basis_strlen(ctx, s1) + 1;
+ int len = strlen(s1) + 1;
char *s;
uw_check_heap(ctx, len);
@@ -2676,7 +2740,6 @@ uw_Basis_string uw_Basis_sqlifyString(uw_context ctx, uw_Basis_string s) {
uw_Basis_string uw_Basis_sqlifyChar(uw_context ctx, uw_Basis_char c) {
char *r, *s2;
-
uw_check_heap(ctx, 5 + uw_Estrings + strlen(uw_sqlsuffixChar));
r = s2 = ctx->heap.front;
@@ -2934,10 +2997,7 @@ uw_Basis_string uw_Basis_floatToString(uw_context ctx, uw_Basis_float n) {
}
uw_Basis_string uw_Basis_charToString(uw_context ctx, uw_Basis_char ch) {
- char *r = uw_malloc(ctx, 2);
- r[0] = ch;
- r[1] = 0;
- return r;
+ return uw_Basis_str1(ctx, ch);
}
uw_Basis_string uw_Basis_boolToString(uw_context ctx, uw_Basis_bool b) {
@@ -2997,11 +3057,12 @@ uw_Basis_char *uw_Basis_stringToChar(uw_context ctx, uw_Basis_string s) {
uw_Basis_char *r = uw_malloc(ctx, 1);
r[0] = 0;
return r;
- } else if (s[1] != 0)
+ } else if (uw_Basis_strlenGe(ctx, s, 2) == uw_Basis_True)
return NULL;
else {
uw_Basis_char *r = uw_malloc(ctx, 1);
- r[0] = s[0];
+ int offset = 0;
+ U8_NEXT(s, offset, -1, *r);
return r;
}
}
@@ -3126,10 +3187,14 @@ uw_Basis_float uw_Basis_stringToFloat_error(uw_context ctx, uw_Basis_string s) {
uw_Basis_char uw_Basis_stringToChar_error(uw_context ctx, uw_Basis_string s) {
if (s[0] == 0)
return 0;
- else if (s[1] != 0)
+ else if (uw_Basis_strlenGe(ctx, s, 2) == uw_Basis_True)
uw_error(ctx, FATAL, "Can't parse char: %s", uw_Basis_htmlifyString(ctx, s));
- else
- return s[0];
+ else {
+ uw_Basis_char c;
+ int offset = 0;
+ U8_NEXT(s, offset, -1, c);
+ return c;
+ }
}
uw_Basis_bool uw_Basis_stringToBool_error(uw_context ctx, uw_Basis_string s) {
@@ -4328,82 +4393,82 @@ void uw_set_global(uw_context ctx, char *name, void *data, void (*free)(void*))
uw_Basis_bool uw_Basis_isalnum(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!isalnum((int)c);
+ return !!u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM);
}
uw_Basis_bool uw_Basis_isalpha(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!isalpha((int)c);
+ return !!u_hasBinaryProperty(c, UCHAR_ALPHABETIC);
}
uw_Basis_bool uw_Basis_isblank(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!isblank((int)c);
+ return !!u_hasBinaryProperty(c, UCHAR_POSIX_BLANK);
}
uw_Basis_bool uw_Basis_iscntrl(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!iscntrl((int)c);
+ return !!(u_charType(c)==U_CONTROL_CHAR);
}
uw_Basis_bool uw_Basis_isdigit(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!isdigit((int)c);
+ return !!u_isdigit(c);
}
uw_Basis_bool uw_Basis_isgraph(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!isgraph((int)c);
+ return !!u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH);
}
uw_Basis_bool uw_Basis_islower(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!islower((int)c);
+ return !!u_hasBinaryProperty(c, UCHAR_LOWERCASE);
}
uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!isprint((int)c);
+ return !!u_hasBinaryProperty(c, UCHAR_POSIX_PRINT);
}
uw_Basis_bool uw_Basis_ispunct(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!ispunct((int)c);
+ return !!u_ispunct(c);
}
uw_Basis_bool uw_Basis_isspace(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!isspace((int)c);
+ return !!u_hasBinaryProperty(c, UCHAR_WHITE_SPACE);
}
uw_Basis_bool uw_Basis_isupper(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!isupper((int)c);
+ return !!u_hasBinaryProperty(c, UCHAR_UPPERCASE);
}
uw_Basis_bool uw_Basis_isxdigit(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return !!isxdigit((int)c);
+ return !!u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT);
}
uw_Basis_char uw_Basis_tolower(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return tolower((int)c);
+ return u_tolower(c);
}
uw_Basis_char uw_Basis_toupper(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return toupper((int)c);
+ return u_toupper(c);
}
uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) {
(void)ctx;
- return (unsigned char)c;
+ return (uw_Basis_int)c;
}
uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) {
(void)ctx;
- return n;
+ return (uw_Basis_char)n;
}
uw_Basis_string uw_Basis_currentUrl(uw_context ctx) {
@@ -4657,7 +4722,7 @@ uw_Basis_string uw_Basis_atom(uw_context ctx, uw_Basis_string s) {
for (p = s; *p; ++p) {
char c = *p;
- if (!isalnum((int)c) && c != '+' && c != '-' && c != '.' && c != '%' && c != '#')
+ if (!U8_IS_SINGLE(c) && !isalnum((int)c) && c != '+' && c != '-' && c != '.' && c != '%' && c != '#')
uw_error(ctx, FATAL, "Disallowed character in CSS atom");
}
@@ -4669,7 +4734,7 @@ uw_Basis_string uw_Basis_css_url(uw_context ctx, uw_Basis_string s) {
for (p = s; *p; ++p) {
char c = *p;
- if (!isalnum((int)c) && c != ':' && c != '/' && c != '.' && c != '_' && c != '+'
+ if (!U8_IS_SINGLE(c) && !isalnum((int)c) && c != ':' && c != '/' && c != '.' && c != '_' && c != '+'
&& c != '-' && c != '%' && c != '?' && c != '&' && c != '=' && c != '#')
uw_error(ctx, FATAL, "Disallowed character in CSS URL");
}
@@ -4688,7 +4753,7 @@ uw_Basis_string uw_Basis_property(uw_context ctx, uw_Basis_string s) {
for (p = s; *p; ++p) {
char c = *p;
- if (!islower((int)c) && !isdigit((int)c) && c != '_' && c != '-')
+ if (!U8_IS_SINGLE(c) && !islower((int)c) && !isdigit((int)c) && c != '_' && c != '-')
uw_error(ctx, FATAL, "Disallowed character in CSS property");
}
@@ -5064,7 +5129,7 @@ void uw_Sqlcache_flush(uw_context ctx, uw_Sqlcache_Cache *cache, char **keys) {
pthread_rwlock_unlock(&cache->lockIn);
}
-int strcmp_nullsafe(const char *str1, const char *str2) {
+int strcmp_nullsafe(const char *str1, const char *str2) {
if (str1)
return strcmp(str1, str2);
else
@@ -5073,7 +5138,7 @@ int strcmp_nullsafe(const char *str1, const char *str2) {
static int is_valid_hash(uw_Basis_string hash) {
for (; *hash; ++hash)
- if (!isxdigit(*hash))
+ if (!U8_IS_SINGLE(*hash) && !isxdigit(*hash))
return 0;
return 1;
diff --git a/src/compiler.sml b/src/compiler.sml
index f724bf56..9ee88c9b 100644
--- a/src/compiler.sml
+++ b/src/compiler.sml
@@ -1585,9 +1585,9 @@ fun compileC {cname, oname, ename, libs, profile, debug, linker, link = link'} =
val proto = Settings.currentProtocol ()
val lib = if Settings.getBootLinking () then
- !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a"
+ !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a -licui18n -licuuc -licudata"
else if Settings.getStaticLinking () then
- " -static " ^ !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a"
+ " -static " ^ !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a -licui18n -licuuc -licudata"
else
"-L" ^ !Settings.configLib ^ " " ^ #linkDynamic proto ^ " -lurweb"
diff --git a/tests/Makefile b/tests/Makefile
index ecf5557b..03e37e4b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -28,3 +28,5 @@ simple::
./driver.sh fact
./driver.sh filter
./driver.sh jsbspace
+ ./driver.sh utf8
+
diff --git a/tests/utf8.py b/tests/utf8.py
new file mode 100644
index 00000000..ff9b737a
--- /dev/null
+++ b/tests/utf8.py
@@ -0,0 +1,449 @@
+import unittest
+import base
+
+class Suite(base.Base):
+ def test_1(self):
+ """Test case: substring (1)"""
+ self.start('Utf8/substrings')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('abc', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('bc', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('c', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('ábó', pre.text)
+
+ pre = self.xpath('pre[5]')
+ self.assertEqual('bó', pre.text)
+
+ pre = self.xpath('pre[6]')
+ self.assertEqual('ó', pre.text)
+
+ pre = self.xpath('pre[7]')
+ self.assertEqual('çãó', pre.text)
+
+ pre = self.xpath('pre[8]')
+ self.assertEqual('ãó', pre.text)
+
+ pre = self.xpath('pre[9]')
+ self.assertEqual('ó', pre.text)
+
+ pre = self.xpath('pre[10]')
+ self.assertEqual('', pre.text)
+
+ pre = self.xpath('pre[11]')
+ self.assertEqual('', pre.text)
+
+
+ def test_2(self):
+ """Test case: strlen (2)"""
+ self.start('Utf8/strlens')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('3', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('3', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('3', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('3', pre.text)
+
+ pre = self.xpath('pre[5]')
+ self.assertEqual('1', pre.text)
+
+ pre = self.xpath('pre[6]')
+ self.assertEqual('1', pre.text)
+
+ pre = self.xpath('pre[7]')
+ self.assertEqual('0', pre.text)
+
+ pre = self.xpath('pre[8]')
+ self.assertEqual('1', pre.text)
+
+ pre = self.xpath('pre[9]')
+ self.assertEqual('1', pre.text)
+
+ pre = self.xpath('pre[10]')
+ self.assertEqual('1', pre.text)
+
+ pre = self.xpath('pre[11]')
+ self.assertEqual('6', pre.text)
+
+ pre = self.xpath('pre[12]')
+ self.assertEqual('2', pre.text)
+
+ pre = self.xpath('pre[13]')
+ self.assertEqual('14', pre.text)
+
+
+ def test_3(self):
+ """Test case: strlenGe (3)"""
+ self.start('Utf8/strlenGens')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('False', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('True', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('False', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('True', pre.text)
+
+ pre = self.xpath('pre[5]')
+ self.assertEqual('True', pre.text)
+
+ pre = self.xpath('pre[6]')
+ self.assertEqual('False', pre.text)
+
+ pre = self.xpath('pre[7]')
+ self.assertEqual('True', pre.text)
+
+ pre = self.xpath('pre[8]')
+ self.assertEqual('True', pre.text)
+
+ def test_4(self):
+ """Test case: strcat (4)"""
+ self.start('Utf8/strcats')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('0', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('aabb', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('4', pre.text)
+
+ pre = self.xpath('pre[5]')
+ self.assertEqual('bb', pre.text)
+
+ pre = self.xpath('pre[6]')
+ self.assertEqual('2', pre.text)
+
+ pre = self.xpath('pre[7]')
+ self.assertEqual('aa', pre.text)
+
+ pre = self.xpath('pre[8]')
+ self.assertEqual('2', pre.text)
+
+ pre = self.xpath('pre[9]')
+ self.assertEqual('ààáá', pre.text)
+
+ pre = self.xpath('pre[10]')
+ self.assertEqual('4', pre.text)
+
+ pre = self.xpath('pre[11]')
+ self.assertEqual('áá', pre.text)
+
+ pre = self.xpath('pre[12]')
+ self.assertEqual('2', pre.text)
+
+ pre = self.xpath('pre[13]')
+ self.assertEqual('àà', pre.text)
+
+ pre = self.xpath('pre[14]')
+ self.assertEqual('2', pre.text)
+
+ def test_5(self):
+ """Test case: strsub (5)"""
+ self.start('Utf8/strsubs')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('a', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('b', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('à', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('ç', pre.text)
+
+ def test_6(self):
+ """Test case: strsuffix (6)"""
+ self.start('Utf8/strsuffixs')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('abàç', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('bàç', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('àç', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('ç', pre.text)
+
+ def test_7(self):
+ """Test case: strchr (7)"""
+ self.start('Utf8/strchrs')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('None', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('Some "bàç"', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('Some "àç"', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('Some "ç"', pre.text)
+
+ pre = self.xpath('pre[5]')
+ self.assertEqual('Some ""', pre.text)
+
+ def test_8(self):
+ """Test case: strindex (8)"""
+ self.start('Utf8/strindexs')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('None', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('Some 0', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('Some 1', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('Some 2', pre.text)
+
+ pre = self.xpath('pre[5]')
+ self.assertEqual('Some 3', pre.text)
+
+ def test_9(self):
+ """Test case: strindex (9)"""
+ self.start('Utf8/strsindexs')
+
+ pre = self.xpath('pre[1]')
+ # behavior of strstr C function
+ self.assertEqual('Some 0', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('Some 0', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('None', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('Some 1', pre.text)
+
+ pre = self.xpath('pre[5]')
+ self.assertEqual('None', pre.text)
+
+ pre = self.xpath('pre[6]')
+ self.assertEqual('Some 2', pre.text)
+
+ pre = self.xpath('pre[7]')
+ self.assertEqual('None', pre.text)
+
+ pre = self.xpath('pre[8]')
+ self.assertEqual('None', pre.text)
+
+ pre = self.xpath('pre[9]')
+ self.assertEqual('Some 3', pre.text)
+
+ def test_10(self):
+ """Test case: strcspn (10)"""
+ self.start('Utf8/strcspns')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('4', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('0', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('0', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('1', pre.text)
+
+ pre = self.xpath('pre[5]')
+ self.assertEqual('2', pre.text)
+
+ pre = self.xpath('pre[6]')
+ self.assertEqual('3', pre.text)
+
+ def test_11(self):
+ """Test case: str1 (11)"""
+ self.start('Utf8/str1s')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('a', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('à', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('á', pre.text)
+
+ def test_12(self):
+ """Test case: isalnum (12)"""
+ self.start('Utf8/isalnums')
+
+ for idx in range(1, 9):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed isalnum: assert ' + str(idx))
+
+ def test_13(self):
+ """Test case: isalpha (13)"""
+ self.start('Utf8/isalphas')
+
+ for idx in range(1, 9):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed isalpha: assert ' + str(idx))
+
+ def test_14(self):
+ """Test case: isblank (14)"""
+ self.start('Utf8/isblanks')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed isblank: assert ' + str(idx))
+
+ def test_15(self):
+ """Test case: iscntrl (15)"""
+ self.start('Utf8/iscntrls')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed iscntrl: assert ' + str(idx))
+
+ def test_16(self):
+ """Test case: isdigit (16)"""
+ self.start('Utf8/isdigits')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed isdigit: assert ' + str(idx))
+
+
+ def test_17(self):
+ """Test case: isgraph (17)"""
+ self.start('Utf8/isgraphs')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed isgraph: assert ' + str(idx))
+
+ def test_18(self):
+ """Test case: islower (18)"""
+ self.start('Utf8/islowers')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed islower: assert ' + str(idx))
+
+ def test_19(self):
+ """Test case: isprint (19)"""
+ self.start('Utf8/isprints')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed isprint: assert ' + str(idx))
+
+ def test_20(self):
+ """Test case: ispunct (20)"""
+ self.start('Utf8/ispuncts')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed ispunct: assert ' + str(idx))
+
+ def test_21(self):
+ """Test case: isspace (21)"""
+ self.start('Utf8/isspaces')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed isspace: assert ' + str(idx))
+
+ def test_22(self):
+ """Test case: isupper (22)"""
+ self.start('Utf8/isuppers')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed isupper: assert ' + str(idx))
+
+ def test_23(self):
+ """Test case: isxdigit (23)"""
+ self.start('Utf8/isxdigits')
+
+ for idx in range(1, 11):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed isxdigit: assert ' + str(idx))
+
+ def test_24(self):
+ """Test case: toupper (24)"""
+ self.start('Utf8/touppers')
+
+ for idx in range(1, 6):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed toupper: assert ' + str(idx))
+
+ def test_25(self):
+ """Test case: ord (25)"""
+ self.start('Utf8/ord_and_chrs')
+
+ for idx in range(1, 8):
+ pre = self.xpath('pre[' + str(idx) + ']')
+ self.assertEqual('True', pre.text, 'Failed ord: assert ' + str(idx))
+
+ def test_26 (self):
+ """Test case: test_db (26) """
+ self.start('Utf8/test_db')
+
+ pre = self.xpath('pre[1]')
+ self.assertEqual('abc', pre.text)
+
+ pre = self.xpath('pre[2]')
+ self.assertEqual('3', pre.text)
+
+ pre = self.xpath('pre[3]')
+ self.assertEqual('çãó', pre.text)
+
+ pre = self.xpath('pre[4]')
+ self.assertEqual('3', pre.text)
+
+ pre = self.xpath('pre[5]')
+ self.assertEqual('が', pre.text)
+
+ pre = self.xpath('pre[6]')
+ self.assertEqual('1', pre.text)
+
+ pre = self.xpath('pre[7]')
+ self.assertEqual('漢', pre.text)
+
+ pre = self.xpath('pre[8]')
+ self.assertEqual('1', pre.text)
+
+ pre = self.xpath('pre[9]')
+ self.assertEqual('カ', pre.text)
+
+ pre = self.xpath('pre[10]')
+ self.assertEqual('1', pre.text)
+
+ pre = self.xpath('pre[11]')
+ self.assertEqual('وظيفية', pre.text)
+
+ pre = self.xpath('pre[12]')
+ self.assertEqual('6', pre.text)
diff --git a/tests/utf8.ur b/tests/utf8.ur
new file mode 100644
index 00000000..0dedc726
--- /dev/null
+++ b/tests/utf8.ur
@@ -0,0 +1,431 @@
+fun substrings () : transaction page = return <xml>
+ <body>
+ <pre>{[substring "abc" 0 3]}</pre>
+ <pre>{[substring "abc" 1 2]}</pre>
+ <pre>{[substring "abc" 2 1]}</pre>
+ <pre>{[substring "ábó" 0 3]}</pre>
+ <pre>{[substring "ábó" 1 2]}</pre>
+ <pre>{[substring "ábó" 2 1]}</pre>
+ <pre>{[substring "çãó" 0 3]}</pre>
+ <pre>{[substring "çãó" 1 2]}</pre>
+ <pre>{[substring "çãó" 2 1]}</pre>
+ <pre>{[substring "çãó" 2 0]}</pre>
+ <pre>{[substring "" 0 0]}</pre>
+ </body>
+</xml>
+
+fun strlens () : transaction page = return <xml>
+ <body>
+ <pre>{[strlen "abc"]}</pre>
+ <pre>{[strlen "çbc"]}</pre>
+ <pre>{[strlen "çãc"]}</pre>
+ <pre>{[strlen "çãó"]}</pre>
+ <pre>{[strlen "ç"]}</pre>
+ <pre>{[strlen "c"]}</pre>
+ <pre>{[strlen ""]}</pre>
+ <pre>{[strlen "が"]}</pre>
+ <pre>{[strlen "漢"]}</pre>
+ <pre>{[strlen "カ"]}</pre>
+ <pre>{[strlen "وظيفية"]}</pre>
+ <pre>{[strlen "函數"]}</pre>
+ <pre>{[strlen "Функциональное"]}</pre>
+ </body>
+ </xml>
+
+fun strlenGens () : transaction page = return <xml>
+ <body>
+ <pre>{[strlenGe "" 1]}</pre>
+ <pre>{[strlenGe "" 0]}</pre>
+ <pre>{[strlenGe "aba" 4]}</pre>
+ <pre>{[strlenGe "aba" 3]}</pre>
+ <pre>{[strlenGe "aba" 2]}</pre>
+ <pre>{[strlenGe "áçà" 4]}</pre>
+ <pre>{[strlenGe "áçà" 3]}</pre>
+ <pre>{[strlenGe "áçà" 2]}</pre>
+
+ </body>
+ </xml>
+
+fun strcats () : transaction page =
+ let
+ fun catAndLen a b =
+ <xml>
+ <pre>{[strcat a b]}</pre>
+ <pre>{[strlen (strcat a b)]}</pre>
+ </xml>
+ in
+ return <xml>
+ <body>
+ {catAndLen "" ""}
+ {catAndLen "aa" "bb"}
+ {catAndLen "" "bb"}
+ {catAndLen "aa" ""}
+ {catAndLen "àà" "áá"}
+ {catAndLen "" "áá"}
+ {catAndLen "àà" ""}
+ </body>
+ </xml>
+end
+
+fun strsubs () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[strsub "abàç" 0]}</pre>
+ <pre>{[strsub "abàç" 1]}</pre>
+ <pre>{[strsub "abàç" 2]}</pre>
+ <pre>{[strsub "abàç" 3]}</pre>
+ </body>
+ </xml>
+
+fun strsuffixs () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[strsuffix "abàç" 0]}</pre>
+ <pre>{[strsuffix "abàç" 1]}</pre>
+ <pre>{[strsuffix "abàç" 2]}</pre>
+ <pre>{[strsuffix "abàç" 3]}</pre>
+ </body>
+ </xml>
+
+fun strchrs () : transaction page =
+ let
+ fun optToStr ms =
+ case ms of
+ None => "None"
+ | Some s => "Some \"" ^ s ^ "\""
+
+ in
+ return <xml>
+ <body>
+ <pre>{[optToStr (strchr "abàç" #"c")]}</pre>
+ <pre>{[optToStr (strchr "abàç" #"a")]}</pre>
+ <pre>{[optToStr (strchr "abàç" #"b")]}</pre>
+ <pre>{[optToStr (strchr "abàç" (strsub "à" 0))]}</pre>
+ <pre>{[optToStr (strchr "abàç" (strsub "ç" 0))]}</pre>
+ </body>
+ </xml>
+ end
+
+fun strindexs () : transaction page =
+ let
+ fun optToStr ms =
+ case ms of
+ None => "None"
+ | Some s => "Some " ^ (show s)
+
+ in
+ return <xml>
+ <body>
+ <pre>{[optToStr (strindex "abàç" #"c")]}</pre>
+ <pre>{[optToStr (strindex "abàç" #"a")]}</pre>
+ <pre>{[optToStr (strindex "abàç" #"b")]}</pre>
+ <pre>{[optToStr (strindex "abàç" (strsub "à" 0))]}</pre>
+ <pre>{[optToStr (strindex "abàç" (strsub "ç" 0))]}</pre>
+ </body>
+ </xml>
+ end
+
+fun strsindexs () : transaction page =
+ let
+ fun optToStr ms =
+ case ms of
+ None => "None"
+ | Some s => "Some " ^ (show s)
+
+ in
+ return <xml>
+ <body>
+ <pre>{[optToStr (strsindex "abàç" "")]}</pre>
+ <pre>{[optToStr (strsindex "abàç" "abàç")]}</pre>
+ <pre>{[optToStr (strsindex "abàç" "abàc")]}</pre>
+ <pre>{[optToStr (strsindex "abàç" "bàç")]}</pre>
+ <pre>{[optToStr (strsindex "abàç" "bàc")]}</pre>
+ <pre>{[optToStr (strsindex "abàç" "àç")]}</pre>
+ <pre>{[optToStr (strsindex "abàç" "àc")]}</pre>
+ <pre>{[optToStr (strsindex "abàç" "ac")]}</pre>
+ <pre>{[optToStr (strsindex "abàç" "ç")]}</pre>
+ </body>
+ </xml>
+ end
+
+fun strcspns () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[strcspn "abàç" ""]}</pre>
+ <pre>{[strcspn "abàç" "abàç"]}</pre>
+ <pre>{[strcspn "abàç" "a"]}</pre>
+ <pre>{[strcspn "abàç" "bàç"]}</pre>
+ <pre>{[strcspn "abàç" "àç"]}</pre>
+ <pre>{[strcspn "abàç" "ç"]}</pre>
+ </body>
+ </xml>
+
+fun str1s () : transaction page = return <xml>
+ <body>
+ <pre>{[str1 #"a"]}</pre>
+ <pre>{[str1 (strsub "à" 0)]}</pre>
+ <pre>{[str1 (strsub "aá" 1)]}</pre>
+ </body>
+ </xml>
+
+fun isalnums () : transaction page = return <xml>
+ <body>
+ <pre>{[isalnum #"a"]}</pre>
+ <pre>{[isalnum (strsub "à" 0)]}</pre>
+ <pre>{[isalnum #"A"]}</pre>
+ <pre>{[isalnum (strsub "À" 0)]}</pre>
+ <pre>{[isalnum #"1"]}</pre>
+ <pre>{[not (isalnum #"!")]}</pre>
+ <pre>{[not (isalnum #"#")]}</pre>
+ <pre>{[not (isalnum #" ")]}</pre>
+ </body>
+</xml>
+
+fun isalphas () : transaction page = return <xml>
+ <body>
+ <pre>{[isalpha #"a"]}</pre>
+ <pre>{[isalpha (strsub "à" 0)]}</pre>
+ <pre>{[isalpha #"A"]}</pre>
+ <pre>{[isalpha (strsub "À" 0)]}</pre>
+ <pre>{[not (isalpha #"1")]}</pre>
+ <pre>{[not (isalpha #"!")]}</pre>
+ <pre>{[not (isalpha #"#")]}</pre>
+ <pre>{[not (isalpha #" ")]}</pre>
+ </body>
+</xml>
+
+fun isblanks () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[not (isblank #"a")]}</pre>
+ <pre>{[not (isblank (strsub "à" 0))]}</pre>
+ <pre>{[not (isblank #"A")]}</pre>
+ <pre>{[not (isblank (strsub "À" 0))]}</pre>
+ <pre>{[not (isblank #"1")]}</pre>
+ <pre>{[not (isblank #"!")]}</pre>
+ <pre>{[not (isblank #"#")]}</pre>
+ <pre>{[isblank #" "]}</pre>
+ <pre>{[isblank #"\t"]}</pre>
+ <pre>{[not (isblank #"\n")]}</pre>
+ </body>
+ </xml>
+
+fun iscntrls () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[not (iscntrl #"a")]}</pre>
+ <pre>{[not (iscntrl (strsub "à" 0))]}</pre>
+ <pre>{[not (iscntrl #"A")]}</pre>
+ <pre>{[not (iscntrl (strsub "À" 0))]}</pre>
+ <pre>{[not (iscntrl #"1")]}</pre>
+ <pre>{[not (iscntrl #"!")]}</pre>
+ <pre>{[not (iscntrl #"#")]}</pre>
+ <pre>{[not (iscntrl #" ")]}</pre>
+ <pre>{[iscntrl #"\t"]}</pre>
+ <pre>{[iscntrl #"\n"]}</pre>
+ </body>
+ </xml>
+
+fun isdigits () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[not (isdigit #"a")]}</pre>
+ <pre>{[not (isdigit (strsub "à" 0))]}</pre>
+ <pre>{[not (isdigit #"A")]}</pre>
+ <pre>{[not (isdigit (strsub "À" 0))]}</pre>
+ <pre>{[isdigit #"1"]}</pre>
+ <pre>{[not (isdigit #"!")]}</pre>
+ <pre>{[not (isdigit #"#")]}</pre>
+ <pre>{[not (isdigit #" ")]}</pre>
+ <pre>{[not (isdigit #"\t")]}</pre>
+ <pre>{[not (isdigit #"\n")]}</pre>
+ </body>
+ </xml>
+
+fun isgraphs () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[isgraph #"a"]}</pre>
+ <pre>{[isgraph (strsub "à" 0)]}</pre>
+ <pre>{[isgraph #"A"]}</pre>
+ <pre>{[isgraph (strsub "À" 0)]}</pre>
+ <pre>{[isgraph #"1"]}</pre>
+ <pre>{[isgraph #"!"]}</pre>
+ <pre>{[isgraph #"#"]}</pre>
+ <pre>{[not (isgraph #" ")]}</pre>
+ <pre>{[not (isgraph #"\t")]}</pre>
+ <pre>{[not (isdigit #"\n")]}</pre>
+ </body>
+ </xml>
+
+fun islowers () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[islower #"a"]}</pre>
+ <pre>{[islower (strsub "à" 0)]}</pre>
+ <pre>{[not (islower #"A")]}</pre>
+ <pre>{[not (islower (strsub "À" 0))]}</pre>
+ <pre>{[not (islower #"1")]}</pre>
+ <pre>{[not (islower #"!")]}</pre>
+ <pre>{[not (islower #"#")]}</pre>
+ <pre>{[not (islower #" ")]}</pre>
+ <pre>{[not (islower #"\t")]}</pre>
+ <pre>{[not (islower #"\n")]}</pre>
+ </body>
+ </xml>
+
+fun isprints () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[isprint #"a"]}</pre>
+ <pre>{[isprint (strsub "à" 0)]}</pre>
+ <pre>{[isprint #"A"]}</pre>
+ <pre>{[isprint (strsub "À" 0)]}</pre>
+ <pre>{[isprint #"1"]}</pre>
+ <pre>{[isprint #"!"]}</pre>
+ <pre>{[isprint #"#"]}</pre>
+ <pre>{[isprint #" "]}</pre>
+ <pre>{[not (isprint #"\t")]}</pre>
+ <pre>{[not (isprint #"\n")]}</pre>
+ </body>
+ </xml>
+
+fun ispuncts () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[not (ispunct #"a")]}</pre>
+ <pre>{[not (ispunct (strsub "à" 0))]}</pre>
+ <pre>{[not (ispunct #"A")]}</pre>
+ <pre>{[not (ispunct (strsub "À" 0))]}</pre>
+ <pre>{[not (ispunct #"1")]}</pre>
+ <pre>{[ispunct #"!"]}</pre>
+ <pre>{[ispunct #"#"]}</pre>
+ <pre>{[not (ispunct #" ")]}</pre>
+ <pre>{[not (isprint #"\t")]}</pre>
+ <pre>{[not (isprint #"\n")]}</pre>
+ </body>
+ </xml>
+
+fun isspaces () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[not (isspace #"a")]}</pre>
+ <pre>{[not (isspace (strsub "à" 0))]}</pre>
+ <pre>{[not (isspace #"A")]}</pre>
+ <pre>{[not (isspace (strsub "À" 0))]}</pre>
+ <pre>{[not (isspace #"1")]}</pre>
+ <pre>{[not (isspace #"!")]}</pre>
+ <pre>{[not (isspace #"#")]}</pre>
+ <pre>{[isspace #" "]}</pre>
+ <pre>{[isspace #"\t"]}</pre>
+ <pre>{[isspace #"\n"]}</pre>
+ </body>
+ </xml>
+
+fun isuppers () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[not (isupper #"a")]}</pre>
+ <pre>{[not (isupper (strsub "à" 0))]}</pre>
+ <pre>{[isupper #"A"]}</pre>
+ <pre>{[isupper (strsub "À" 0)]}</pre>
+ <pre>{[not (isupper #"1")]}</pre>
+ <pre>{[not (isupper #"!")]}</pre>
+ <pre>{[not (isupper #"#")]}</pre>
+ <pre>{[not (isupper #" ")]}</pre>
+ <pre>{[not (isupper #"\t")]}</pre>
+ <pre>{[not (isupper #"\n")]}</pre>
+ </body>
+ </xml>
+
+fun isxdigits () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[isxdigit #"a"]}</pre>
+ <pre>{[not (isxdigit (strsub "à" 0))]}</pre>
+ <pre>{[isxdigit #"A"]}</pre>
+ <pre>{[not (isxdigit (strsub "À" 0))]}</pre>
+ <pre>{[isxdigit #"1"]}</pre>
+ <pre>{[not (isxdigit #"!")]}</pre>
+ <pre>{[not (isxdigit #"#")]}</pre>
+ <pre>{[not (isxdigit #" ")]}</pre>
+ <pre>{[not (isxdigit #"\t")]}</pre>
+ <pre>{[not (isxdigit #"\n")]}</pre>
+ </body>
+ </xml>
+
+fun tolowers () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[tolower #"A" = #"a"]}</pre>
+ <pre>{[tolower #"a" = #"a"]}</pre>
+ <pre>{[tolower (strsub "á" 0) = (strsub "á" 0)]}</pre>
+ <pre>{[tolower (strsub "Á" 0) = (strsub "á" 0)]}</pre>
+ <pre>{[tolower #"1" = #"1"]}</pre>
+ </body>
+ </xml>
+
+fun touppers () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[toupper #"A" = #"A"]}</pre>
+ <pre>{[toupper #"a" = #"A"]}</pre>
+ <pre>{[toupper (strsub "á" 0) = (strsub "Á" 0)]}</pre>
+ <pre>{[toupper (strsub "Á" 0) = (strsub "Á" 0)]}</pre>
+ <pre>{[toupper #"1" = #"1"]}</pre>
+ </body>
+ </xml>
+
+fun ord_and_chrs () : transaction page =
+ return <xml>
+ <body>
+ <pre>{[chr (ord #"A") = #"A"]}</pre>
+ <pre>{[chr (ord #"a") = #"a"]}</pre>
+ <pre>{[chr (ord (strsub "á" 0)) = (strsub "á" 0)]}</pre>
+ <pre>{[chr (ord (strsub "Á" 0)) = (strsub "Á" 0)]}</pre>
+ <pre>{[chr (ord #"1") = #"1"]}</pre>
+ <pre>{[chr (ord #"\n") = #"\n"]}</pre>
+ <pre>{[chr (ord (strsub "が" 0)) = (strsub "が" 0)]}</pre>
+ <pre>{[chr (ord (strsub "漢" 0)) = (strsub "漢" 0)]}</pre>
+ <pre>{[chr (ord (strsub "カ" 0)) = (strsub "カ" 0)]}</pre>
+ </body>
+ </xml>
+
+table t : { Id : int, Text : string }
+
+
+fun test_db () : transaction page =
+ dml (INSERT INTO t (Id, Text) VALUES({[1]}, {["abc"]}));
+ t1 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 1);
+
+ dml (INSERT INTO t (Id, Text) VALUES({[2]}, {["çãó"]}));
+ t2 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 2);
+
+ dml (INSERT INTO t (Id, Text) VALUES({[3]}, {["が"]}));
+ t3 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 3);
+
+ dml (INSERT INTO t (Id, Text) VALUES({[4]}, {["漢"]}));
+ t4 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 4);
+
+ dml (INSERT INTO t (Id, Text) VALUES({[5]}, {["カ"]}));
+ t5 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 5);
+
+ dml (INSERT INTO t (Id, Text) VALUES({[6]}, {["وظيفية"]}));
+ t6 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 6);
+
+ return <xml>
+ <body>
+ <pre>{[t1.T.Text]}</pre>
+ <pre>{[strlen t1.T.Text]}</pre>
+ <pre>{[t2.T.Text]}</pre>
+ <pre>{[strlen t2.T.Text]}</pre>
+ <pre>{[t3.T.Text]}</pre>
+ <pre>{[strlen t3.T.Text]}</pre>
+ <pre>{[t4.T.Text]}</pre>
+ <pre>{[strlen t4.T.Text]}</pre>
+ <pre>{[t5.T.Text]}</pre>
+ <pre>{[strlen t5.T.Text]}</pre>
+ <pre>{[t6.T.Text]}</pre>
+ <pre>{[strlen t6.T.Text]}</pre>
+ </body>
+ </xml>
diff --git a/tests/utf8.urp b/tests/utf8.urp
new file mode 100644
index 00000000..9b3067af
--- /dev/null
+++ b/tests/utf8.urp
@@ -0,0 +1,5 @@
+database dbname=utf8
+sql utf8.sql
+safeGet Utf8/test_db
+
+utf8 \ No newline at end of file