diff options
-rw-r--r-- | include/urweb/urweb_cpp.h | 5 | ||||
-rw-r--r-- | lib/js/urweb.js | 7 | ||||
-rw-r--r-- | lib/ur/basis.urs | 2 | ||||
-rw-r--r-- | src/c/urweb.c | 265 |
4 files changed, 168 insertions, 111 deletions
diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h index 5f1144b8..25f97fb3 100644 --- a/include/urweb/urweb_cpp.h +++ b/include/urweb/urweb_cpp.h @@ -103,7 +103,7 @@ char *uw_Basis_htmlifyFloat(struct uw_context *, uw_Basis_float); char *uw_Basis_htmlifyString(struct uw_context *, uw_Basis_string); char *uw_Basis_htmlifyBool(struct uw_context *, uw_Basis_bool); char *uw_Basis_htmlifyTime(struct uw_context *, uw_Basis_time); -char *uw_Basis_htmlifySpecialChar(struct uw_context *, unsigned char); +char *uw_Basis_htmlifySpecialChar(struct uw_context *, uw_Basis_char); char *uw_Basis_htmlifySource(struct uw_context *, uw_Basis_source); uw_unit uw_Basis_htmlifyInt_w(struct uw_context *, uw_Basis_int); @@ -111,7 +111,7 @@ uw_unit uw_Basis_htmlifyFloat_w(struct uw_context *, uw_Basis_float); uw_unit uw_Basis_htmlifyString_w(struct uw_context *, uw_Basis_string); uw_unit uw_Basis_htmlifyBool_w(struct uw_context *, uw_Basis_bool); uw_unit uw_Basis_htmlifyTime_w(struct uw_context *, uw_Basis_time); -uw_unit uw_Basis_htmlifySpecialChar_w(struct uw_context *, unsigned char); +uw_unit uw_Basis_htmlifySpecialChar_w(struct uw_context *, uw_Basis_char); uw_unit uw_Basis_htmlifySource_w(struct uw_context *, uw_Basis_source); char *uw_Basis_attrifyInt(struct uw_context *, uw_Basis_int); @@ -327,6 +327,7 @@ uw_Basis_bool uw_Basis_isxdigit(struct uw_context *, uw_Basis_char); uw_Basis_char uw_Basis_tolower(struct uw_context *, uw_Basis_char); uw_Basis_char uw_Basis_toupper(struct uw_context *, uw_Basis_char); +uw_Basis_bool uw_Basis_iscodepoint(struct uw_context *, uw_Basis_int); uw_Basis_int uw_Basis_ord(struct uw_context *, uw_Basis_char); uw_Basis_char uw_Basis_chr(struct uw_context *, uw_Basis_int); diff --git a/lib/js/urweb.js b/lib/js/urweb.js index de1a2ad0..c7725e28 100644 --- a/lib/js/urweb.js +++ b/lib/js/urweb.js @@ -38,7 +38,12 @@ function isXdigit(c) { return isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' function ord(c) { return c.charCodeAt(0); } function isPrint(c) { return ord(c) > 31 && ord(c) != 127; } function toLower(c) { return c.toLowerCase(); } -function toUpper(c) { return c.toUpperCase(); } +function toUpper(c) { + if (ord(c) == 223) + return c; + else + return c.toUpperCase(); +} // Lists diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index 878f2793..c9d6556b 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -79,6 +79,8 @@ val toupper : char -> char val ord : char -> int val chr : int -> char +val iscodepoint : int -> bool + (** String operations *) val strlen : string -> int diff --git a/src/c/urweb.c b/src/c/urweb.c index be65afcc..195ddada 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -1559,101 +1559,89 @@ const char *uw_Basis_get_settings(uw_context ctx, uw_unit u) { } } -uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) { - char *r, *s2; - - uw_check_heap(ctx, strlen(s) * 4 + 3); - - r = s2 = ctx->heap.front; - *s2++ = '"'; - - for (; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '"': - strcpy(s2, "\\\""); - s2 += 2; - break; - case '\'': - strcpy(s2, "\\047"); - s2 += 4; - break; - case '\\': - strcpy(s2, "\\\\"); - s2 += 2; - break; - case '<': - strcpy(s2, "\\074"); - s2 += 4; - break; - case '&': - strcpy(s2, "\\046"); - s2 += 4; - break; - default: - if (isprint((int)c) || c >= 128) - *s2++ = c; - else { - sprintf(s2, "\\%03o", c); - s2 += 4; - } - } - } - - strcpy(s2, "\""); - ctx->heap.front = s2 + 2; - return r; -} - uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char ch); - -uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) { - char *r, *s2; - - uw_check_heap(ctx, 7); - - r = s2 = ctx->heap.front; - *s2++ = '"'; - +void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) { + char* buffer = *buffer_ptr; + switch (c1) { case '"': - strcpy(s2, "\\\""); - s2 += 2; + strcpy(buffer, "\\\""); + buffer += 2; break; case '\'': - strcpy(s2, "\\047"); - s2 += 4; + strcpy(buffer, "\\047"); + buffer += 4; break; case '\\': - strcpy(s2, "\\\\"); - s2 += 2; + strcpy(buffer, "\\\\"); + buffer += 2; break; case '<': - strcpy(s2, "\\074"); - s2 += 4; + strcpy(buffer, "\\074"); + buffer += 4; break; case '&': - strcpy(s2, "\\046"); - s2 += 4; + strcpy(buffer, "\\046"); + buffer += 4; break; default: if (uw_Basis_isprint(ctx, c1) == uw_Basis_True) { int offset = 0; - U8_APPEND_UNSAFE(s2, offset, c1); - s2 += offset; + U8_APPEND_UNSAFE(buffer, offset, c1); + buffer += offset; } else { - assert(0777 >= c1); - sprintf(s2, "\\%03o", (unsigned char)c1); - s2 += 4; + assert(65536 > c1); + sprintf(buffer, "\\u%04x", (unsigned char)c1); + buffer += 6; } } + + *buffer_ptr = buffer; +} + +uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) { + char *r, *s2; + uw_Basis_char c; + + uw_check_heap(ctx, strlen(s) * 6 + 3); + + r = s2 = ctx->heap.front; + *s2++ = '"'; + + int offset = 0; + while(s[offset] != 0) + { + U8_NEXT(s, offset, -1, c); + + jsifyChar(&s2, ctx, c); + } + strcpy(s2, "\""); ctx->heap.front = s2 + 2; + + return r; +} + +uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c); + +uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) { + char *r, *s2; + + uw_check_heap(ctx, 8); + + r = s2 = ctx->heap.front; + + *s2++ = '"'; + + jsifyChar(&s2, ctx, c1); + + strcpy(s2, "\""); + ctx->heap.front = s2 + 2; + return r; } @@ -1697,6 +1685,7 @@ uw_Basis_string uw_Basis_jsifyString_ws(uw_context ctx, uw_Basis_string s) { strcpy(s2, "\""); ctx->script.front = s2 + 1; + return r; } @@ -2262,25 +2251,27 @@ uw_unit uw_Basis_htmlifyInt_w(uw_context ctx, uw_Basis_int n) { return uw_unit_v; } -char *uw_Basis_htmlifySpecialChar(uw_context ctx, unsigned char ch) { +char *uw_Basis_htmlifySpecialChar(uw_context ctx, uw_Basis_char ch) { unsigned int n = ch; int len; char *r; - uw_check_heap(ctx, INTS_MAX+3); + uw_check_heap(ctx, INTS_MAX+3 + 1); r = ctx->heap.front; - sprintf(r, "&#%u;%n", n, &len); + len = sprintf(r, "&#%u;", n); ctx->heap.front += len+1; + return r; } -uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, unsigned char ch) { +uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, uw_Basis_char ch) { unsigned int n = ch; int len; uw_check(ctx, INTS_MAX+3); - sprintf(ctx->page.front, "&#%u;%n", n, &len); + len = sprintf(ctx->page.front, "&#%u;", n); ctx->page.front += len; + return uw_unit_v; } @@ -2328,48 +2319,69 @@ uw_unit uw_Basis_jsifyInt_w(uw_context ctx, uw_Basis_int n) { char *uw_Basis_htmlifyString(uw_context ctx, const char *s) { char *r, *s2; + uw_Basis_char c1; + int offset = 0, len = 0; + + uw_check_heap(ctx, strlen(s) * (INTS_MAX + 3) + 1); - uw_check_heap(ctx, strlen(s) * 5 + 1); - - for (r = s2 = ctx->heap.front; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '<': - strcpy(s2, "<"); - s2 += 4; - break; - case '&': - strcpy(s2, "&"); - s2 += 5; - break; - default: - *s2++ = c; + r = s2 = ctx->heap.front; + + while (s[offset] != 0) { + + U8_NEXT(s, offset, -1, c1); + + + if (U8_IS_SINGLE(c1) && uw_Basis_isprint(ctx, c1)) { + switch (c1) { + case '<': + strcpy(s2, "<"); + s2 += 4; + break; + case '&': + strcpy(s2, "&"); + s2 += 5; + break; + default: + *s2++ = c1; + } + } else { + len = sprintf(s2, "&#%u;", c1); + s2 += len; } } - + *s2++ = 0; ctx->heap.front = s2; + return r; } uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { uw_check(ctx, strlen(s) * 6); - - for (; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '<': - uw_write_unsafe(ctx, "<"); - break; - case '&': - uw_write_unsafe(ctx, "&"); - break; - default: - uw_writec_unsafe(ctx, c); + int offset = 0; + uw_Basis_char c1; + + while(s[offset] != 0){ + + U8_NEXT(s, offset, -1, c1); + + if (U8_IS_SINGLE(c1) && uw_Basis_isprint(ctx, c1)) { + + switch (c1) { + case '<': + uw_write_unsafe(ctx, "<"); + break; + case '&': + uw_write_unsafe(ctx, "&"); + break; + default: + uw_writec_unsafe(ctx, c1); + } } - } + else { + uw_Basis_htmlifySpecialChar_w(ctx, c1); + } + } return uw_unit_v; } @@ -4474,9 +4486,46 @@ uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) { return (uw_Basis_int)c; } +uw_Basis_bool uw_Basis_iscodepoint (uw_context ctx, uw_Basis_int n) { + (void)ctx; + uw_Basis_char ch = (uw_Basis_char)n; + + if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) { + + if (U8_LENGTH(ch) == 0) { + return uw_Basis_False; + } + + if (u_charType(ch) == U_UNASSIGNED) { + return uw_Basis_False; + } + + } else { + return uw_Basis_False; + } + + return uw_Basis_True; +} + uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) { (void)ctx; - return (uw_Basis_char)n; + uw_Basis_char ch = (uw_Basis_char)n; + + if (UCHAR_MIN_VALUE <= ch && UCHAR_MAX_VALUE > ch) { + + if (U8_LENGTH(ch) == 0) { + uw_error(ctx, FATAL, "The integer %lld cannot be converted to a char", n); + } + + if (u_charType(ch) == U_UNASSIGNED) { + uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n); + } + + } else { + uw_error(ctx, FATAL, "Integer %lld out of range of unicode chars", n); + } + + return ch; } uw_Basis_string uw_Basis_currentUrl(uw_context ctx) { |