9 files changed, 1024 insertions, 71 deletions
diff --git a/.travis.yml b/.travis.yml
index df4e4abc..86d731cc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,7 +18,7 @@ compiler:
 before_install:
   - export CONFIGURE_ARGS=""
   - if command -v apt-get &>/dev/null; then sudo apt-get update -qq; fi
-  - if command -v apt-get &>/dev/null; then sudo apt-get install -y mlton; fi
+  - if command -v apt-get &>/dev/null; then sudo apt-get install -y mlton lib-icudev; fi
   - if command -v brew &>/dev/null; then brew update; fi
   - if command -v brew &>/dev/null; then brew uninstall libtool; fi
   - if command -v brew &>/dev/null; then brew install libtool; fi
diff --git a/include/urweb/types_cpp.h b/include/urweb/types_cpp.h
index 0c546d1c..c6c0dd3e 100644
--- a/include/urweb/types_cpp.h
+++ b/include/urweb/types_cpp.h
@@ -4,11 +4,12 @@
 #include <time.h>
 #include <unistd.h>
 #include <stdint.h>
+#include <unicode/utypes.h>
 
 typedef long long uw_Basis_int;
 typedef double uw_Basis_float;
 typedef char* uw_Basis_string;
-typedef char uw_Basis_char;
+typedef UChar32 uw_Basis_char;
 typedef struct {
   time_t seconds;
   unsigned microseconds;
diff --git a/src/c/Makefile.am b/src/c/Makefile.am
index 027b1458..96c1d92f 100644
--- a/src/c/Makefile.am
+++ b/src/c/Makefile.am
@@ -11,7 +11,7 @@ AM_CFLAGS = -Wall -Wunused-parameter -Werror -Wno-format-security -Wno-deprecate
 liburweb_la_LDFLAGS = $(AM_LDFLAGS) $(OPENSSL_LDFLAGS) \
 	-export-symbols-regex '^(client_pruner|pthread_create_big|strcmp_nullsafe|uw_.*)' \
 	-version-info 1:0:0
-liburweb_la_LIBADD = $(PTHREAD_LIBS) -lm $(OPENSSL_LIBS)
+liburweb_la_LIBADD = $(PTHREAD_LIBS) -lm $(OPENSSL_LIBS) -licui18n -licuuc -licudata
 liburweb_http_la_LIBADD = liburweb.la
 liburweb_http_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' \
 	-version-info 1:0:0
diff --git a/src/c/urweb.c b/src/c/urweb.c
index 2e3e18bc..69c3da94 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -20,6 +20,9 @@
 
 #include <pthread.h>
 
+#include <unicode/utf8.h>
+#include <unicode/uchar.h>
+
 #include "types.h"
 
 #include "uthash.h"
@@ -2421,28 +2424,34 @@ uw_unit uw_Basis_htmlifySource_w(uw_context ctx, uw_Basis_source src) {
   return uw_unit_v;
 }
 
-uw_Basis_char uw_Basis_strsub(uw_context ctx, uw_Basis_string s, uw_Basis_int n) {
+uw_Basis_char uw_Basis_strsub(uw_context ctx, uw_Basis_string s, uw_Basis_int n) {  
+  uw_Basis_char c;
+  int offset = 0;
+  
   while (n >= 0) {
-    if (*s == 0)
+    
+    if (s[offset] == 0)
       uw_error(ctx, FATAL, "Out-of-bounds strsub");
 
+    U8_NEXT(s, offset, -1, c);
+    
     if (n == 0)
-      return *s;
+      return c;
 
     --n;
-    ++s;
   }
 
   uw_error(ctx, FATAL, "Negative strsub bound");
 }
 
 uw_Basis_string uw_Basis_strsuffix(uw_context ctx, uw_Basis_string s, uw_Basis_int n) {
+  int offset = 0;
   while (n >= 0) {
-    if (*s == 0 || n == 0)
-      return s;
+    if (s[offset] == 0 || n == 0)
+      return s + offset;
 
+    U8_FWD_1(s, offset, -1);
     --n;
-    ++s;
   }
 
   uw_error(ctx, FATAL, "Negative strsuffix bound");
@@ -2450,40 +2459,80 @@ uw_Basis_string uw_Basis_strsuffix(uw_context ctx, uw_Basis_string s, uw_Basis_i
 
 uw_Basis_int uw_Basis_strlen(uw_context ctx, uw_Basis_string s) {
   (void)ctx;
-  return strlen(s);
+  int offset = 0, iterations = 0;
+  while (s[offset] != 0) {
+    U8_FWD_1(s, offset, -1);
+    ++iterations;
+  }
+  return iterations;
 }
 
 uw_Basis_bool uw_Basis_strlenGe(uw_context ctx, uw_Basis_string s, uw_Basis_int n) {
   (void)ctx;
-
+  int offset = 0;
   while (n > 0) {
-    if (*s == 0)
+    if (s[offset] == 0)
       return uw_Basis_False;
-
+        
+    U8_FWD_1(s, offset, -1);
     --n;
-    ++s;
   }
 
   return uw_Basis_True;
 }
 
+int aux_strchr(uw_Basis_string s, uw_Basis_char ch, int* o_offset) {
+  int u8idx = 0, offset = 0;
+  uw_Basis_char c;
+    
+  while (s[offset] != 0) {
+    U8_NEXT(s, offset, -1, c);
+    if (c == ch) {
+      *o_offset = offset;
+      return u8idx;
+    }
+
+    ++u8idx;
+  }
+
+  *o_offset = -1;
+  return -1;
+}
+
 uw_Basis_string uw_Basis_strchr(uw_context ctx, uw_Basis_string s, uw_Basis_char ch) {
   (void)ctx;
-  return strchr(s, ch);
+  int offset = -1;
+  if (aux_strchr(s, ch, &offset) > -1) {
+    return s + offset;
+  }
+  return NULL;  
 }
 
 uw_Basis_int uw_Basis_strcspn(uw_context ctx, uw_Basis_string s, uw_Basis_string chs) {
   (void)ctx;
-  return strcspn(s, chs);
+  int offset = 0, u8idx = 0, offsetChs = 0;
+  uw_Basis_char c;
+  
+  while (s[offset] != 0) {
+    U8_NEXT(s, offset, -1, c);
+    if (aux_strchr(chs, c, &offsetChs) > -1) {
+      return u8idx;
+    }
+    ++u8idx;
+  }
+
+  return u8idx;
 }
 
 uw_Basis_int *uw_Basis_strindex(uw_context ctx, uw_Basis_string s, uw_Basis_char ch) {
-  uw_Basis_string r = strchr(s, ch);
-  if (r == NULL)
+  (void)ctx;
+  int offset = -1;
+  int r = aux_strchr(s, ch, &offset);
+  if (r == -1)
     return NULL;
   else {
     uw_Basis_int *nr = uw_malloc(ctx, sizeof(uw_Basis_int));
-    *nr = r - s;
+    *nr = r;
     return nr;
   }
 }
@@ -2494,13 +2543,19 @@ uw_Basis_int *uw_Basis_strsindex(uw_context ctx, const char *haystack, const cha
     return NULL;
   else {
     uw_Basis_int *nr = uw_malloc(ctx, sizeof(uw_Basis_int));
-    *nr = r - haystack;
+    int src = r - haystack, offset = 0, utf8idx = 0;
+    while (offset < src) {
+      U8_FWD_1(haystack, offset, -1);
+      ++utf8idx;
+    }
+    
+    *nr = utf8idx;
     return nr;
   }
 }
 
 uw_Basis_string uw_Basis_strcat(uw_context ctx, uw_Basis_string s1, uw_Basis_string s2) {
-  int len = uw_Basis_strlen(ctx, s1) + uw_Basis_strlen(ctx, s2) + 1;
+  int len = strlen(s1) + strlen(s2) + 1;
   char *s;
 
   uw_check_heap(ctx, len);
@@ -2515,8 +2570,8 @@ uw_Basis_string uw_Basis_strcat(uw_context ctx, uw_Basis_string s1, uw_Basis_str
 }
 
 uw_Basis_string uw_Basis_substring(uw_context ctx, uw_Basis_string s, uw_Basis_int start, uw_Basis_int len) {
-  size_t full_len = uw_Basis_strlen(ctx, s);
-
+  int full_len = uw_Basis_strlen(ctx, s);
+  
   if (start < 0)
     uw_error(ctx, FATAL, "substring: Negative start index");
   if (len < 0)
@@ -2524,32 +2579,41 @@ uw_Basis_string uw_Basis_substring(uw_context ctx, uw_Basis_string s, uw_Basis_i
   if (start + len > full_len)
     uw_error(ctx, FATAL, "substring: Start index plus length is too large");
 
-  if (start + len == full_len)
-    return &s[start];
-  else {
-    uw_Basis_string r = uw_malloc(ctx, len+1);
-    memcpy(r, s+start, len);
-    r[len] = 0;
+  int offset = 0;
+  U8_FWD_N(s, offset, -1, start);
+  
+  if (start + len == full_len) {
+    return s + offset;
+  } else {
+    int end = offset;
+    U8_FWD_N(s, end, -1, len);
+
+    int actual_len = end - offset;
+
+    uw_Basis_string r = uw_malloc(ctx, actual_len + 1);
+    memcpy(r, s + offset, actual_len);
+    r[actual_len] = 0;
     return r;
   }
-
 }
 
 uw_Basis_string uw_Basis_str1(uw_context ctx, uw_Basis_char ch) {
   char *r;
-
-  uw_check_heap(ctx, 2);
+  int req = U8_LENGTH(ch);
+  int offset = 0;
+  
+  uw_check_heap(ctx, req + 1);
   r = ctx->heap.front;
-  r[0] = ch;
-  r[1] = 0;
 
-  ctx->heap.front += 2;
+  U8_APPEND_UNSAFE(r, offset, ch);  
+  r[req] = 0;
 
-  return r;
+  ctx->heap.front += req + 1;
+  return r; 
 }
 
 uw_Basis_string uw_strdup(uw_context ctx, uw_Basis_string s1) {
-  int len = uw_Basis_strlen(ctx, s1) + 1;
+  int len = strlen(s1) + 1;
   char *s;
 
   uw_check_heap(ctx, len);
@@ -2676,7 +2740,6 @@ uw_Basis_string uw_Basis_sqlifyString(uw_context ctx, uw_Basis_string s) {
 
 uw_Basis_string uw_Basis_sqlifyChar(uw_context ctx, uw_Basis_char c) {
   char *r, *s2;
-
   uw_check_heap(ctx, 5 + uw_Estrings + strlen(uw_sqlsuffixChar));
 
   r = s2 = ctx->heap.front;
@@ -2934,10 +2997,7 @@ uw_Basis_string uw_Basis_floatToString(uw_context ctx, uw_Basis_float n) {
 }
 
 uw_Basis_string uw_Basis_charToString(uw_context ctx, uw_Basis_char ch) {
-  char *r = uw_malloc(ctx, 2);
-  r[0] = ch;
-  r[1] = 0;
-  return r;
+  return uw_Basis_str1(ctx, ch);
 }
 
 uw_Basis_string uw_Basis_boolToString(uw_context ctx, uw_Basis_bool b) {
@@ -2997,11 +3057,12 @@ uw_Basis_char *uw_Basis_stringToChar(uw_context ctx, uw_Basis_string s) {
     uw_Basis_char *r = uw_malloc(ctx, 1);
     r[0] = 0;
     return r;
-  } else if (s[1] != 0)
+  } else if (uw_Basis_strlenGe(ctx, s, 2) == uw_Basis_True)
     return NULL;
   else {
     uw_Basis_char *r = uw_malloc(ctx, 1);
-    r[0] = s[0];
+    int offset = 0;
+    U8_NEXT(s, offset, -1, *r);
     return r;
   }
 }
@@ -3126,10 +3187,14 @@ uw_Basis_float uw_Basis_stringToFloat_error(uw_context ctx, uw_Basis_string s) {
 uw_Basis_char uw_Basis_stringToChar_error(uw_context ctx, uw_Basis_string s) {
   if (s[0] == 0)
     return 0;
-  else if (s[1] != 0)
+  else if (uw_Basis_strlenGe(ctx, s, 2) == uw_Basis_True)
     uw_error(ctx, FATAL, "Can't parse char: %s", uw_Basis_htmlifyString(ctx, s));
-  else
-    return s[0];
+  else {
+    uw_Basis_char c;
+    int offset = 0;
+    U8_NEXT(s, offset, -1, c);
+    return c;
+  }
 }
 
 uw_Basis_bool uw_Basis_stringToBool_error(uw_context ctx, uw_Basis_string s) {
@@ -4328,82 +4393,82 @@ void uw_set_global(uw_context ctx, char *name, void *data, void (*free)(void*))
 
 uw_Basis_bool uw_Basis_isalnum(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!isalnum((int)c);
+  return !!u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM);
 }
 
 uw_Basis_bool uw_Basis_isalpha(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!isalpha((int)c);
+  return !!u_hasBinaryProperty(c, UCHAR_ALPHABETIC);
 }
 
 uw_Basis_bool uw_Basis_isblank(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!isblank((int)c);
+  return !!u_hasBinaryProperty(c, UCHAR_POSIX_BLANK);
 }
 
 uw_Basis_bool uw_Basis_iscntrl(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!iscntrl((int)c);
+  return !!(u_charType(c)==U_CONTROL_CHAR);
 }
 
 uw_Basis_bool uw_Basis_isdigit(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!isdigit((int)c);
+  return !!u_isdigit(c);
 }
 
 uw_Basis_bool uw_Basis_isgraph(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!isgraph((int)c);
+  return !!u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH);
 }
 
 uw_Basis_bool uw_Basis_islower(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!islower((int)c);
+  return !!u_hasBinaryProperty(c, UCHAR_LOWERCASE);
 }
 
 uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!isprint((int)c);
+  return !!u_hasBinaryProperty(c, UCHAR_POSIX_PRINT);
 }
 
 uw_Basis_bool uw_Basis_ispunct(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!ispunct((int)c);
+  return !!u_ispunct(c);
 }
 
 uw_Basis_bool uw_Basis_isspace(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!isspace((int)c);
+  return !!u_hasBinaryProperty(c, UCHAR_WHITE_SPACE);
 }
 
 uw_Basis_bool uw_Basis_isupper(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!isupper((int)c);
+  return !!u_hasBinaryProperty(c, UCHAR_UPPERCASE);
 }
 
 uw_Basis_bool uw_Basis_isxdigit(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return !!isxdigit((int)c);
+  return !!u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT);
 }
 
 uw_Basis_char uw_Basis_tolower(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return tolower((int)c);
+  return u_tolower(c);
 }
 
 uw_Basis_char uw_Basis_toupper(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return toupper((int)c);
+  return u_toupper(c);
 }
 
 uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) {
   (void)ctx;
-  return (unsigned char)c;
+  return (uw_Basis_int)c;
 }
 
 uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) {
   (void)ctx;
-  return n;
+  return (uw_Basis_char)n;
 }
 
 uw_Basis_string uw_Basis_currentUrl(uw_context ctx) {
@@ -4657,7 +4722,7 @@ uw_Basis_string uw_Basis_atom(uw_context ctx, uw_Basis_string s) {
 
   for (p = s; *p; ++p) {
     char c = *p;
-    if (!isalnum((int)c) && c != '+' && c != '-' && c != '.' && c != '%' && c != '#')
+    if (!U8_IS_SINGLE(c) && !isalnum((int)c) && c != '+' && c != '-' && c != '.' && c != '%' && c != '#')
       uw_error(ctx, FATAL, "Disallowed character in CSS atom");
   }
 
@@ -4669,7 +4734,7 @@ uw_Basis_string uw_Basis_css_url(uw_context ctx, uw_Basis_string s) {
 
   for (p = s; *p; ++p) {
     char c = *p;
-    if (!isalnum((int)c) && c != ':' && c != '/' && c != '.' && c != '_' && c != '+'
+    if (!U8_IS_SINGLE(c) && !isalnum((int)c) && c != ':' && c != '/' && c != '.' && c != '_' && c != '+'
         && c != '-' && c != '%' && c != '?' && c != '&' && c != '=' && c != '#')
       uw_error(ctx, FATAL, "Disallowed character in CSS URL");
   }
@@ -4688,7 +4753,7 @@ uw_Basis_string uw_Basis_property(uw_context ctx, uw_Basis_string s) {
 
   for (p = s; *p; ++p) {
     char c = *p;
-    if (!islower((int)c) && !isdigit((int)c) && c != '_' && c != '-')
+    if (!U8_IS_SINGLE(c) && !islower((int)c) && !isdigit((int)c) && c != '_' && c != '-')
       uw_error(ctx, FATAL, "Disallowed character in CSS property");
   }
 
@@ -5064,7 +5129,7 @@ void uw_Sqlcache_flush(uw_context ctx, uw_Sqlcache_Cache *cache, char **keys) {
   pthread_rwlock_unlock(&cache->lockIn);
 }
 
-int strcmp_nullsafe(const char *str1, const char *str2) {
+int strcmp_nullsafe(const char *str1, const char *str2) {  
   if (str1)
     return strcmp(str1, str2);
   else
@@ -5073,7 +5138,7 @@ int strcmp_nullsafe(const char *str1, const char *str2) {
 
 static int is_valid_hash(uw_Basis_string hash) {
   for (; *hash; ++hash)
-    if (!isxdigit(*hash))
+    if (!U8_IS_SINGLE(*hash) && !isxdigit(*hash))
       return 0;
 
   return 1;
diff --git a/src/compiler.sml b/src/compiler.sml
index f724bf56..9ee88c9b 100644
--- a/src/compiler.sml
+++ b/src/compiler.sml
@@ -1585,9 +1585,9 @@ fun compileC {cname, oname, ename, libs, profile, debug, linker, link = link'} =
         val proto = Settings.currentProtocol ()
 
         val lib = if Settings.getBootLinking () then
-                      !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a"
+                      !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a -licui18n -licuuc -licudata"
                   else if Settings.getStaticLinking () then
-                      " -static " ^ !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a"
+                      " -static " ^ !Settings.configLib ^ "/" ^ #linkStatic proto ^ " " ^ !Settings.configLib ^ "/liburweb.a -licui18n -licuuc -licudata"
                   else
                       "-L" ^ !Settings.configLib ^ " " ^ #linkDynamic proto ^ " -lurweb"
 
diff --git a/tests/Makefile b/tests/Makefile
index ecf5557b..03e37e4b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -28,3 +28,5 @@ simple::
 	./driver.sh fact
 	./driver.sh filter
 	./driver.sh jsbspace
+	./driver.sh utf8
+
diff --git a/tests/utf8.py b/tests/utf8.py
new file mode 100644
index 00000000..ff9b737a
--- /dev/null
+++ b/tests/utf8.py
@@ -0,0 +1,449 @@
+import unittest
+import base
+
+class Suite(base.Base):
+    def test_1(self):
+        """Test case: substring (1)"""
+        self.start('Utf8/substrings')
+
+        pre = self.xpath('pre[1]')
+        self.assertEqual('abc', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('bc', pre.text)
+
+        pre = self.xpath('pre[3]')
+        self.assertEqual('c', pre.text)
+
+        pre = self.xpath('pre[4]')
+        self.assertEqual('ábó', pre.text)
+
+        pre = self.xpath('pre[5]')
+        self.assertEqual('bó', pre.text)
+
+        pre = self.xpath('pre[6]')
+        self.assertEqual('ó', pre.text)
+        
+        pre = self.xpath('pre[7]')
+        self.assertEqual('çãó', pre.text)
+
+        pre = self.xpath('pre[8]')
+        self.assertEqual('ãó', pre.text)
+
+        pre = self.xpath('pre[9]')
+        self.assertEqual('ó', pre.text)
+
+        pre = self.xpath('pre[10]')
+        self.assertEqual('', pre.text)
+
+        pre = self.xpath('pre[11]')
+        self.assertEqual('', pre.text)
+
+        
+    def test_2(self):
+        """Test case: strlen (2)"""
+        self.start('Utf8/strlens')
+
+        pre = self.xpath('pre[1]')
+        self.assertEqual('3', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('3', pre.text)
+
+        pre = self.xpath('pre[3]')
+        self.assertEqual('3', pre.text)
+
+        pre = self.xpath('pre[4]')
+        self.assertEqual('3', pre.text)
+
+        pre = self.xpath('pre[5]')
+        self.assertEqual('1', pre.text)
+
+        pre = self.xpath('pre[6]')
+        self.assertEqual('1', pre.text)
+
+        pre = self.xpath('pre[7]')
+        self.assertEqual('0', pre.text)
+        
+        pre = self.xpath('pre[8]')
+        self.assertEqual('1', pre.text)
+        
+        pre = self.xpath('pre[9]')
+        self.assertEqual('1', pre.text)
+
+        pre = self.xpath('pre[10]')
+        self.assertEqual('1', pre.text)
+
+        pre = self.xpath('pre[11]')
+        self.assertEqual('6', pre.text)
+
+        pre = self.xpath('pre[12]')
+        self.assertEqual('2', pre.text)
+
+        pre = self.xpath('pre[13]')
+        self.assertEqual('14', pre.text)
+
+        
+    def test_3(self):
+        """Test case: strlenGe (3)"""
+        self.start('Utf8/strlenGens')
+        
+        pre = self.xpath('pre[1]')
+        self.assertEqual('False', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('True', pre.text)
+
+        pre = self.xpath('pre[3]')
+        self.assertEqual('False', pre.text)
+
+        pre = self.xpath('pre[4]')
+        self.assertEqual('True', pre.text)
+
+        pre = self.xpath('pre[5]')
+        self.assertEqual('True', pre.text)
+
+        pre = self.xpath('pre[6]')
+        self.assertEqual('False', pre.text)
+
+        pre = self.xpath('pre[7]')
+        self.assertEqual('True', pre.text)
+
+        pre = self.xpath('pre[8]')
+        self.assertEqual('True', pre.text)
+
+    def test_4(self):
+        """Test case: strcat (4)"""
+        self.start('Utf8/strcats')
+        
+        pre = self.xpath('pre[1]')
+        self.assertEqual('', pre.text)
+        
+        pre = self.xpath('pre[2]')
+        self.assertEqual('0', pre.text)
+
+        pre = self.xpath('pre[3]')
+        self.assertEqual('aabb', pre.text)
+        
+        pre = self.xpath('pre[4]')
+        self.assertEqual('4', pre.text)
+        
+        pre = self.xpath('pre[5]')
+        self.assertEqual('bb', pre.text)
+        
+        pre = self.xpath('pre[6]')
+        self.assertEqual('2', pre.text)
+        
+        pre = self.xpath('pre[7]')
+        self.assertEqual('aa', pre.text)
+        
+        pre = self.xpath('pre[8]')
+        self.assertEqual('2', pre.text)
+        
+        pre = self.xpath('pre[9]')
+        self.assertEqual('ààáá', pre.text)
+        
+        pre = self.xpath('pre[10]')
+        self.assertEqual('4', pre.text)
+        
+        pre = self.xpath('pre[11]')
+        self.assertEqual('áá', pre.text)
+        
+        pre = self.xpath('pre[12]')
+        self.assertEqual('2', pre.text)
+        
+        pre = self.xpath('pre[13]')
+        self.assertEqual('àà', pre.text)
+        
+        pre = self.xpath('pre[14]')
+        self.assertEqual('2', pre.text)
+
+    def test_5(self):
+        """Test case: strsub (5)"""
+        self.start('Utf8/strsubs')
+
+        pre = self.xpath('pre[1]')
+        self.assertEqual('a', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('b', pre.text)
+
+        pre = self.xpath('pre[3]')
+        self.assertEqual('à', pre.text)
+
+        pre = self.xpath('pre[4]')
+        self.assertEqual('ç', pre.text)
+
+    def test_6(self):
+        """Test case: strsuffix (6)"""
+        self.start('Utf8/strsuffixs')
+
+        pre = self.xpath('pre[1]')
+        self.assertEqual('abàç', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('bàç', pre.text)
+        
+        pre = self.xpath('pre[3]')
+        self.assertEqual('àç', pre.text)
+        
+        pre = self.xpath('pre[4]')
+        self.assertEqual('ç', pre.text)
+
+    def test_7(self):
+        """Test case: strchr (7)"""
+        self.start('Utf8/strchrs')
+
+        pre = self.xpath('pre[1]')
+        self.assertEqual('None', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('Some "bàç"', pre.text)
+        
+        pre = self.xpath('pre[3]')
+        self.assertEqual('Some "àç"', pre.text)
+
+        pre = self.xpath('pre[4]')
+        self.assertEqual('Some "ç"', pre.text)
+
+        pre = self.xpath('pre[5]')
+        self.assertEqual('Some ""', pre.text)
+        
+    def test_8(self):
+        """Test case: strindex (8)"""
+        self.start('Utf8/strindexs')
+        
+        pre = self.xpath('pre[1]')
+        self.assertEqual('None', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('Some 0', pre.text)
+        
+        pre = self.xpath('pre[3]')
+        self.assertEqual('Some 1', pre.text)
+
+        pre = self.xpath('pre[4]')
+        self.assertEqual('Some 2', pre.text)
+
+        pre = self.xpath('pre[5]')
+        self.assertEqual('Some 3', pre.text)
+
+    def test_9(self):
+        """Test case: strindex (9)"""
+        self.start('Utf8/strsindexs')
+
+        pre = self.xpath('pre[1]')
+        # behavior of strstr C function
+        self.assertEqual('Some 0', pre.text)
+        
+        pre = self.xpath('pre[2]')
+        self.assertEqual('Some 0', pre.text)
+        
+        pre = self.xpath('pre[3]')
+        self.assertEqual('None', pre.text)
+        
+        pre = self.xpath('pre[4]')
+        self.assertEqual('Some 1', pre.text)
+
+        pre = self.xpath('pre[5]')
+        self.assertEqual('None', pre.text)
+        
+        pre = self.xpath('pre[6]')
+        self.assertEqual('Some 2', pre.text)
+
+        pre = self.xpath('pre[7]')
+        self.assertEqual('None', pre.text)
+        
+        pre = self.xpath('pre[8]')
+        self.assertEqual('None', pre.text)
+
+        pre = self.xpath('pre[9]')
+        self.assertEqual('Some 3', pre.text)
+
+    def test_10(self):
+        """Test case: strcspn (10)"""
+        self.start('Utf8/strcspns')
+
+        pre = self.xpath('pre[1]')
+        self.assertEqual('4', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('0', pre.text)
+        
+        pre = self.xpath('pre[3]')
+        self.assertEqual('0', pre.text)
+        
+        pre = self.xpath('pre[4]')
+        self.assertEqual('1', pre.text)
+
+        pre = self.xpath('pre[5]')
+        self.assertEqual('2', pre.text)
+
+        pre = self.xpath('pre[6]')
+        self.assertEqual('3', pre.text)
+
+    def test_11(self):
+        """Test case: str1 (11)"""
+        self.start('Utf8/str1s')
+
+        pre = self.xpath('pre[1]')
+        self.assertEqual('a', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('à', pre.text)
+
+        pre = self.xpath('pre[3]')
+        self.assertEqual('á', pre.text)
+
+    def test_12(self):
+        """Test case: isalnum (12)"""
+        self.start('Utf8/isalnums')
+                               
+        for idx in range(1, 9):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed isalnum: assert ' + str(idx))
+        
+    def test_13(self):
+        """Test case: isalpha (13)"""
+        self.start('Utf8/isalphas')
+                       
+        for idx in range(1, 9):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed isalpha: assert ' + str(idx))
+        
+    def test_14(self):
+        """Test case: isblank (14)"""
+        self.start('Utf8/isblanks')
+               
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed isblank: assert ' + str(idx))
+
+    def test_15(self):
+        """Test case: iscntrl (15)"""
+        self.start('Utf8/iscntrls')
+               
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed iscntrl: assert ' + str(idx))
+        
+    def test_16(self):
+        """Test case: isdigit (16)"""
+        self.start('Utf8/isdigits')
+               
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed isdigit: assert ' + str(idx))
+
+        
+    def test_17(self):
+        """Test case: isgraph (17)"""
+        self.start('Utf8/isgraphs')
+        
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed isgraph: assert ' + str(idx))
+    
+    def test_18(self):
+        """Test case: islower (18)"""
+        self.start('Utf8/islowers')
+        
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed islower: assert ' + str(idx))
+        
+    def test_19(self):
+        """Test case: isprint (19)"""
+        self.start('Utf8/isprints')
+
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed isprint: assert ' + str(idx))
+        
+    def test_20(self):
+        """Test case: ispunct (20)"""
+        self.start('Utf8/ispuncts')
+
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed ispunct: assert ' + str(idx))
+        
+    def test_21(self):
+        """Test case: isspace (21)"""
+        self.start('Utf8/isspaces')
+
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed isspace: assert ' + str(idx))
+
+    def test_22(self):
+        """Test case: isupper (22)"""
+        self.start('Utf8/isuppers')
+
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed isupper: assert ' + str(idx))
+
+    def test_23(self):
+        """Test case: isxdigit (23)"""
+        self.start('Utf8/isxdigits')
+
+        for idx in range(1, 11):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed isxdigit: assert ' + str(idx))
+
+    def test_24(self):
+        """Test case: toupper (24)"""
+        self.start('Utf8/touppers')
+
+        for idx in range(1, 6):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed toupper: assert ' + str(idx))
+
+    def test_25(self):
+        """Test case: ord (25)"""
+        self.start('Utf8/ord_and_chrs')
+
+        for idx in range(1, 8):
+            pre = self.xpath('pre[' + str(idx) + ']')
+            self.assertEqual('True', pre.text, 'Failed ord: assert ' + str(idx))
+
+    def test_26 (self):
+        """Test case: test_db (26) """
+        self.start('Utf8/test_db')
+
+        pre = self.xpath('pre[1]')
+        self.assertEqual('abc', pre.text)
+
+        pre = self.xpath('pre[2]')
+        self.assertEqual('3', pre.text)
+
+        pre = self.xpath('pre[3]')
+        self.assertEqual('çãó', pre.text)
+
+        pre = self.xpath('pre[4]')
+        self.assertEqual('3', pre.text)
+
+        pre = self.xpath('pre[5]')
+        self.assertEqual('が', pre.text)
+
+        pre = self.xpath('pre[6]')
+        self.assertEqual('1', pre.text)
+
+        pre = self.xpath('pre[7]')
+        self.assertEqual('漢', pre.text)
+
+        pre = self.xpath('pre[8]')
+        self.assertEqual('1', pre.text)
+
+        pre = self.xpath('pre[9]')
+        self.assertEqual('カ', pre.text)
+
+        pre = self.xpath('pre[10]')
+        self.assertEqual('1', pre.text)
+
+        pre = self.xpath('pre[11]')
+        self.assertEqual('وظيفية', pre.text)
+
+        pre = self.xpath('pre[12]')
+        self.assertEqual('6', pre.text)
diff --git a/tests/utf8.ur b/tests/utf8.ur
new file mode 100644
index 00000000..0dedc726
--- /dev/null
+++ b/tests/utf8.ur
@@ -0,0 +1,431 @@
+fun substrings () : transaction page = return <xml>
+  <body>
+    <pre>{[substring "abc" 0 3]}</pre>   
+    <pre>{[substring "abc" 1 2]}</pre>
+    <pre>{[substring "abc" 2 1]}</pre>
+    <pre>{[substring "ábó" 0 3]}</pre>    
+    <pre>{[substring "ábó" 1 2]}</pre>
+    <pre>{[substring "ábó" 2 1]}</pre>    
+    <pre>{[substring "çãó" 0 3]}</pre>
+    <pre>{[substring "çãó" 1 2]}</pre>
+    <pre>{[substring "çãó" 2 1]}</pre>
+    <pre>{[substring "çãó" 2 0]}</pre>
+    <pre>{[substring "" 0 0]}</pre>
+  </body>
+</xml>
+
+fun strlens () : transaction page = return <xml>
+  <body>
+    <pre>{[strlen "abc"]}</pre>
+    <pre>{[strlen "çbc"]}</pre>
+    <pre>{[strlen "çãc"]}</pre>
+    <pre>{[strlen "çãó"]}</pre>
+    <pre>{[strlen "ç"]}</pre>
+    <pre>{[strlen "c"]}</pre>
+    <pre>{[strlen ""]}</pre>
+    <pre>{[strlen "が"]}</pre>
+    <pre>{[strlen "漢"]}</pre>
+    <pre>{[strlen "カ"]}</pre>
+    <pre>{[strlen "وظيفية"]}</pre>
+    <pre>{[strlen "函數"]}</pre>
+    <pre>{[strlen "Функциональное"]}</pre>
+  </body>
+  </xml>
+				       
+fun strlenGens () : transaction page = return <xml>
+  <body>
+    <pre>{[strlenGe "" 1]}</pre>
+    <pre>{[strlenGe "" 0]}</pre>
+    <pre>{[strlenGe "aba" 4]}</pre>
+    <pre>{[strlenGe "aba" 3]}</pre>
+    <pre>{[strlenGe "aba" 2]}</pre>
+    <pre>{[strlenGe "áçà" 4]}</pre>
+    <pre>{[strlenGe "áçà" 3]}</pre>
+    <pre>{[strlenGe "áçà" 2]}</pre>
+    
+  </body>
+  </xml>
+
+fun strcats () : transaction page =
+    let
+	fun catAndLen a b =
+	    <xml>
+	      <pre>{[strcat a b]}</pre>
+	      <pre>{[strlen (strcat a b)]}</pre>
+	    </xml>
+    in
+	return <xml>
+	  <body>
+	    {catAndLen "" ""}
+	    {catAndLen "aa" "bb"}
+	    {catAndLen "" "bb"}
+	    {catAndLen "aa" ""}
+	    {catAndLen "àà" "áá"}
+	    {catAndLen "" "áá"}
+	    {catAndLen "àà" ""}	    
+	  </body>
+	</xml>
+end
+
+fun strsubs () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[strsub "abàç" 0]}</pre>
+	<pre>{[strsub "abàç" 1]}</pre>
+	<pre>{[strsub "abàç" 2]}</pre>
+	<pre>{[strsub "abàç" 3]}</pre>
+      </body>
+      </xml>
+
+fun strsuffixs () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[strsuffix "abàç" 0]}</pre>
+	<pre>{[strsuffix "abàç" 1]}</pre>
+	<pre>{[strsuffix "abàç" 2]}</pre>
+	<pre>{[strsuffix "abàç" 3]}</pre>
+      </body>
+    </xml>
+
+fun strchrs () : transaction page =
+    let
+	fun optToStr ms =
+	    case ms of
+		None => "None"
+	      | Some s => "Some \"" ^ s ^ "\""
+
+    in
+	return <xml>
+	  <body>
+	    <pre>{[optToStr (strchr "abàç" #"c")]}</pre>
+	    <pre>{[optToStr (strchr "abàç" #"a")]}</pre>
+	    <pre>{[optToStr (strchr "abàç" #"b")]}</pre>
+	    <pre>{[optToStr (strchr "abàç" (strsub "à" 0))]}</pre>
+	    <pre>{[optToStr (strchr "abàç" (strsub "ç" 0))]}</pre>
+	  </body>
+	</xml>
+    end
+
+fun strindexs () : transaction page =
+    let
+	fun optToStr ms =
+	    case ms of
+		None => "None"
+	      | Some s => "Some " ^ (show s)
+
+    in
+	return <xml>
+	  <body>
+	    <pre>{[optToStr (strindex "abàç" #"c")]}</pre>
+	    <pre>{[optToStr (strindex "abàç" #"a")]}</pre>
+	    <pre>{[optToStr (strindex "abàç" #"b")]}</pre>
+	    <pre>{[optToStr (strindex "abàç" (strsub "à" 0))]}</pre>
+	    <pre>{[optToStr (strindex "abàç" (strsub "ç" 0))]}</pre>
+	  </body>
+	</xml>
+    end
+
+fun strsindexs () : transaction page =
+    let
+	fun optToStr ms =
+	    case ms of
+		None => "None"
+	      | Some s => "Some " ^ (show s)
+
+    in
+	return <xml>
+	  <body>
+	    <pre>{[optToStr (strsindex "abàç" "")]}</pre>
+	    <pre>{[optToStr (strsindex "abàç" "abàç")]}</pre>
+	    <pre>{[optToStr (strsindex "abàç" "abàc")]}</pre>
+	    <pre>{[optToStr (strsindex "abàç" "bàç")]}</pre>
+	    <pre>{[optToStr (strsindex "abàç" "bàc")]}</pre>
+	    <pre>{[optToStr (strsindex "abàç" "àç")]}</pre>
+	    <pre>{[optToStr (strsindex "abàç" "àc")]}</pre>
+	    <pre>{[optToStr (strsindex "abàç" "ac")]}</pre>
+	    <pre>{[optToStr (strsindex "abàç" "ç")]}</pre>
+	  </body>
+	</xml>
+    end
+	
+fun strcspns () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[strcspn "abàç" ""]}</pre>
+	<pre>{[strcspn "abàç" "abàç"]}</pre>
+	<pre>{[strcspn "abàç" "a"]}</pre>
+	<pre>{[strcspn "abàç" "bàç"]}</pre>
+	<pre>{[strcspn "abàç" "àç"]}</pre>
+	<pre>{[strcspn "abàç" "ç"]}</pre>
+      </body>
+      </xml>
+
+fun str1s () : transaction page = return <xml>
+  <body>
+    <pre>{[str1 #"a"]}</pre>
+    <pre>{[str1 (strsub "à" 0)]}</pre>
+    <pre>{[str1 (strsub "aá" 1)]}</pre>
+  </body>
+  </xml>
+
+fun isalnums () : transaction page = return <xml>
+  <body>
+    <pre>{[isalnum #"a"]}</pre>
+    <pre>{[isalnum (strsub "à" 0)]}</pre>
+    <pre>{[isalnum #"A"]}</pre>
+    <pre>{[isalnum (strsub "À" 0)]}</pre>
+    <pre>{[isalnum #"1"]}</pre>
+    <pre>{[not (isalnum #"!")]}</pre>
+    <pre>{[not (isalnum #"#")]}</pre>
+    <pre>{[not (isalnum #" ")]}</pre>
+  </body>
+</xml>
+
+fun isalphas () : transaction page = return <xml>
+  <body>
+    <pre>{[isalpha #"a"]}</pre>
+    <pre>{[isalpha (strsub "à" 0)]}</pre>
+    <pre>{[isalpha #"A"]}</pre>
+    <pre>{[isalpha (strsub "À" 0)]}</pre>
+    <pre>{[not (isalpha #"1")]}</pre>
+    <pre>{[not (isalpha #"!")]}</pre>
+    <pre>{[not (isalpha #"#")]}</pre>
+    <pre>{[not (isalpha #" ")]}</pre>
+  </body>
+</xml>
+
+fun isblanks () : transaction page = 
+    return <xml>
+      <body>
+	<pre>{[not (isblank #"a")]}</pre>
+	<pre>{[not (isblank (strsub "à" 0))]}</pre>
+	<pre>{[not (isblank #"A")]}</pre>
+	<pre>{[not (isblank (strsub "À" 0))]}</pre>
+	<pre>{[not (isblank #"1")]}</pre>
+	<pre>{[not (isblank #"!")]}</pre>
+	<pre>{[not (isblank #"#")]}</pre>
+	<pre>{[isblank #" "]}</pre>
+	<pre>{[isblank #"\t"]}</pre>
+	<pre>{[not (isblank #"\n")]}</pre>
+      </body>
+    </xml>
+
+fun iscntrls () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[not (iscntrl #"a")]}</pre>
+	<pre>{[not (iscntrl (strsub "à" 0))]}</pre>
+	<pre>{[not (iscntrl #"A")]}</pre>
+	<pre>{[not (iscntrl (strsub "À" 0))]}</pre>
+	<pre>{[not (iscntrl #"1")]}</pre>
+	<pre>{[not (iscntrl #"!")]}</pre>
+	<pre>{[not (iscntrl #"#")]}</pre>
+	<pre>{[not (iscntrl #" ")]}</pre>
+	<pre>{[iscntrl #"\t"]}</pre>
+	<pre>{[iscntrl #"\n"]}</pre>
+      </body>
+      </xml>
+
+fun isdigits () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[not (isdigit #"a")]}</pre>
+	<pre>{[not (isdigit (strsub "à" 0))]}</pre>
+	<pre>{[not (isdigit #"A")]}</pre>
+	<pre>{[not (isdigit (strsub "À" 0))]}</pre>
+	<pre>{[isdigit #"1"]}</pre>
+	<pre>{[not (isdigit #"!")]}</pre>
+	<pre>{[not (isdigit #"#")]}</pre>
+	<pre>{[not (isdigit #" ")]}</pre>
+	<pre>{[not (isdigit #"\t")]}</pre>
+	<pre>{[not (isdigit #"\n")]}</pre>
+      </body>
+      </xml>
+
+fun isgraphs () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[isgraph #"a"]}</pre>
+	<pre>{[isgraph (strsub "à" 0)]}</pre>
+	<pre>{[isgraph #"A"]}</pre>
+	<pre>{[isgraph (strsub "À" 0)]}</pre>
+	<pre>{[isgraph #"1"]}</pre>
+	<pre>{[isgraph #"!"]}</pre>
+	<pre>{[isgraph #"#"]}</pre>
+	<pre>{[not (isgraph #" ")]}</pre>
+	<pre>{[not (isgraph #"\t")]}</pre>
+	<pre>{[not (isdigit #"\n")]}</pre>
+      </body>
+      </xml>
+
+fun islowers () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[islower #"a"]}</pre>
+	<pre>{[islower (strsub "à" 0)]}</pre>
+	<pre>{[not (islower #"A")]}</pre>
+	<pre>{[not (islower (strsub "À" 0))]}</pre>
+	<pre>{[not (islower #"1")]}</pre>
+	<pre>{[not (islower #"!")]}</pre>
+	<pre>{[not (islower #"#")]}</pre>
+	<pre>{[not (islower #" ")]}</pre>
+	<pre>{[not (islower #"\t")]}</pre>
+	<pre>{[not (islower #"\n")]}</pre>
+      </body>
+      </xml>
+
+fun isprints () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[isprint #"a"]}</pre>
+	<pre>{[isprint (strsub "à" 0)]}</pre>
+	<pre>{[isprint #"A"]}</pre>
+	<pre>{[isprint (strsub "À" 0)]}</pre>
+	<pre>{[isprint #"1"]}</pre>
+	<pre>{[isprint #"!"]}</pre>
+	<pre>{[isprint #"#"]}</pre>
+	<pre>{[isprint #" "]}</pre>
+	<pre>{[not (isprint #"\t")]}</pre>
+	<pre>{[not (isprint #"\n")]}</pre>
+      </body>
+      </xml>
+
+fun ispuncts () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[not (ispunct #"a")]}</pre>
+	<pre>{[not (ispunct (strsub "à" 0))]}</pre>
+	<pre>{[not (ispunct #"A")]}</pre>
+	<pre>{[not (ispunct (strsub "À" 0))]}</pre>
+	<pre>{[not (ispunct #"1")]}</pre>
+	<pre>{[ispunct #"!"]}</pre>
+	<pre>{[ispunct #"#"]}</pre>
+	<pre>{[not (ispunct #" ")]}</pre>
+	<pre>{[not (isprint #"\t")]}</pre>
+	<pre>{[not (isprint #"\n")]}</pre>
+      </body>
+      </xml>
+
+fun isspaces () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[not (isspace #"a")]}</pre>
+	<pre>{[not (isspace (strsub "à" 0))]}</pre>
+	<pre>{[not (isspace #"A")]}</pre>
+	<pre>{[not (isspace (strsub "À" 0))]}</pre>
+	<pre>{[not (isspace #"1")]}</pre>
+	<pre>{[not (isspace #"!")]}</pre>
+	<pre>{[not (isspace #"#")]}</pre>
+	<pre>{[isspace #" "]}</pre>
+	<pre>{[isspace #"\t"]}</pre>
+	<pre>{[isspace #"\n"]}</pre>
+      </body>
+    </xml>
+    
+fun isuppers () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[not (isupper #"a")]}</pre>
+	<pre>{[not (isupper (strsub "à" 0))]}</pre>
+	<pre>{[isupper #"A"]}</pre>
+	<pre>{[isupper (strsub "À" 0)]}</pre>
+	<pre>{[not (isupper #"1")]}</pre>
+	<pre>{[not (isupper #"!")]}</pre>
+	<pre>{[not (isupper #"#")]}</pre>
+	<pre>{[not (isupper #" ")]}</pre>
+	<pre>{[not (isupper #"\t")]}</pre>
+	<pre>{[not (isupper #"\n")]}</pre>
+      </body>
+      </xml>
+
+fun isxdigits () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[isxdigit #"a"]}</pre>
+	<pre>{[not (isxdigit (strsub "à" 0))]}</pre>
+	<pre>{[isxdigit #"A"]}</pre>
+	<pre>{[not (isxdigit (strsub "À" 0))]}</pre>
+	<pre>{[isxdigit #"1"]}</pre>
+	<pre>{[not (isxdigit #"!")]}</pre>
+	<pre>{[not (isxdigit #"#")]}</pre>
+	<pre>{[not (isxdigit #" ")]}</pre>
+	<pre>{[not (isxdigit #"\t")]}</pre>
+	<pre>{[not (isxdigit #"\n")]}</pre>
+      </body>
+      </xml>
+
+fun tolowers () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[tolower #"A" = #"a"]}</pre>
+	<pre>{[tolower #"a" = #"a"]}</pre>
+	<pre>{[tolower (strsub "á" 0) = (strsub "á" 0)]}</pre>
+	<pre>{[tolower (strsub "Á" 0) = (strsub "á" 0)]}</pre>
+	<pre>{[tolower #"1" = #"1"]}</pre>
+      </body>
+    </xml>
+    
+fun touppers () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[toupper #"A" = #"A"]}</pre>
+	<pre>{[toupper #"a" = #"A"]}</pre>
+	<pre>{[toupper (strsub "á" 0) = (strsub "Á" 0)]}</pre>
+	<pre>{[toupper (strsub "Á" 0) = (strsub "Á" 0)]}</pre>
+	<pre>{[toupper #"1" = #"1"]}</pre>
+      </body>
+      </xml>
+
+fun ord_and_chrs () : transaction page =
+    return <xml>
+      <body>
+	<pre>{[chr (ord #"A") = #"A"]}</pre>
+	<pre>{[chr (ord #"a") = #"a"]}</pre>
+	<pre>{[chr (ord (strsub "á" 0)) = (strsub "á" 0)]}</pre>
+	<pre>{[chr (ord (strsub "Á" 0)) = (strsub "Á" 0)]}</pre>
+	<pre>{[chr (ord #"1") = #"1"]}</pre>
+	<pre>{[chr (ord #"\n") = #"\n"]}</pre>
+	<pre>{[chr (ord (strsub "が" 0)) = (strsub "が" 0)]}</pre>
+	<pre>{[chr (ord (strsub "漢" 0)) = (strsub "漢" 0)]}</pre>
+	<pre>{[chr (ord (strsub "カ" 0)) = (strsub "カ" 0)]}</pre>
+      </body>
+      </xml>
+
+table t : { Id : int, Text : string }
+
+
+fun test_db () : transaction page =
+    dml (INSERT INTO t (Id, Text) VALUES({[1]}, {["abc"]}));
+    t1 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 1);
+
+    dml (INSERT INTO t (Id, Text) VALUES({[2]}, {["çãó"]}));
+    t2 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 2);
+
+    dml (INSERT INTO t (Id, Text) VALUES({[3]}, {["が"]}));
+    t3 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 3);
+
+    dml (INSERT INTO t (Id, Text) VALUES({[4]}, {["漢"]}));
+    t4 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 4);
+
+    dml (INSERT INTO t (Id, Text) VALUES({[5]}, {["カ"]}));
+    t5 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 5);
+
+    dml (INSERT INTO t (Id, Text) VALUES({[6]}, {["وظيفية"]}));
+    t6 <- oneRow (SELECT t.Text FROM t WHERE t.Id = 6);
+
+    return <xml>
+      <body>
+	<pre>{[t1.T.Text]}</pre>
+	<pre>{[strlen t1.T.Text]}</pre>
+	<pre>{[t2.T.Text]}</pre>
+	<pre>{[strlen t2.T.Text]}</pre>
+	<pre>{[t3.T.Text]}</pre>
+	<pre>{[strlen t3.T.Text]}</pre>
+	<pre>{[t4.T.Text]}</pre>
+	<pre>{[strlen t4.T.Text]}</pre>
+	<pre>{[t5.T.Text]}</pre>
+	<pre>{[strlen t5.T.Text]}</pre>
+	<pre>{[t6.T.Text]}</pre>
+	<pre>{[strlen t6.T.Text]}</pre>
+      </body>
+      </xml>
diff --git a/tests/utf8.urp b/tests/utf8.urp
new file mode 100644
index 00000000..9b3067af
--- /dev/null
+++ b/tests/utf8.urp
@@ -0,0 +1,5 @@
+database dbname=utf8
+sql utf8.sql
+safeGet Utf8/test_db
+
+utf8
+\ No newline at end of file