diff options
Diffstat (limited to 'src/c')
-rw-r--r-- | src/c/Makefile.am | 21 | ||||
-rw-r--r-- | src/c/http.c | 24 | ||||
-rw-r--r-- | src/c/memmem.c | 15 | ||||
-rw-r--r-- | src/c/memmem.h | 23 | ||||
-rw-r--r-- | src/c/request.c | 5 | ||||
-rw-r--r-- | src/c/urweb.c | 646 |
6 files changed, 505 insertions, 229 deletions
diff --git a/src/c/Makefile.am b/src/c/Makefile.am index 58f5153c..ff4b6eaf 100644 --- a/src/c/Makefile.am +++ b/src/c/Makefile.am @@ -1,21 +1,26 @@ lib_LTLIBRARIES = liburweb.la liburweb_http.la liburweb_cgi.la liburweb_fastcgi.la liburweb_static.la -liburweb_la_SOURCES = memmem.c openssl.c urweb.c request.c queue.c +liburweb_la_SOURCES = memmem.c memmem.h openssl.c urweb.c request.c queue.c liburweb_http_la_SOURCES = http.c liburweb_cgi_la_SOURCES = cgi.c liburweb_fastcgi_la_SOURCES = fastcgi.c fastcgi.h liburweb_static_la_SOURCES = static.c -AM_CPPFLAGS = -I$(srcdir)/../../include/urweb $(OPENSSL_INCLUDES) +AM_CPPFLAGS = -I$(srcdir)/../../include/urweb $(OPENSSL_INCLUDES) $(ICU_INCLUDES) AM_CFLAGS = -Wall -Wunused-parameter -Werror -Wno-format-security -Wno-deprecated-declarations -U_FORTIFY_SOURCE $(PTHREAD_CFLAGS) liburweb_la_LDFLAGS = $(AM_LDFLAGS) $(OPENSSL_LDFLAGS) \ - -export-symbols-regex '^(client_pruner|pthread_create_big|strcmp_nullsafe|uw_.*)' -liburweb_la_LIBADD = $(PTHREAD_LIBS) -lm $(OPENSSL_LIBS) + -export-symbols-regex '^(client_pruner|pthread_create_big|strcmp_nullsafe|uw_.*)' \ + -version-info 1:0:0 +liburweb_la_LIBADD = $(PTHREAD_LIBS) -lm $(OPENSSL_LIBS) $(ICU_LIBS) -licui18n -licuuc -licudata -licuio liburweb_http_la_LIBADD = liburweb.la -liburweb_http_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' +liburweb_http_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' \ + -version-info 1:0:0 liburweb_cgi_la_LIBADD = liburweb.la -liburweb_cgi_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' +liburweb_cgi_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' \ + -version-info 1:0:0 liburweb_fastcgi_la_LIBADD = liburweb.la -liburweb_fastcgi_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' +liburweb_fastcgi_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' \ + -version-info 1:0:0 liburweb_static_la_LIBADD = liburweb.la -liburweb_static_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' +liburweb_static_la_LDFLAGS = -export-symbols-regex '^(main|uw_.*)' \ + -version-info 1:0:0 diff --git a/src/c/http.c b/src/c/http.c index 72685508..de2f1376 100644 --- a/src/c/http.c +++ b/src/c/http.c @@ -11,6 +11,7 @@ #include <unistd.h> #include <signal.h> #include <stdarg.h> +#include <sys/un.h> #include <pthread.h> @@ -65,6 +66,7 @@ static void log_error(void *data, const char *fmt, ...) { va_start(ap, fmt); vfprintf(stderr, fmt, ap); + fflush(stderr); } static void log_debug(void *data, const char *fmt, ...) { @@ -75,12 +77,13 @@ static void log_debug(void *data, const char *fmt, ...) { va_start(ap, fmt); vprintf(fmt, ap); + fflush(stdout); } } static uw_loggers ls = {NULL, log_error, log_debug}; -static unsigned max_buf_size = 1024 * 1024; // That's 1MB. +static unsigned max_buf_size = 10 * 1024 * 1024; // That's 10MB. static void *worker(void *data) { int me = *(int *)data; @@ -333,7 +336,7 @@ static void *worker(void *data) { } static void help(char *cmd) { - printf("Usage: %s [-p <port>] [-a <IPv4 address>] [-A <IPv6 address>] [-t <thread count>] [-m <bytes>] [-k] [-q] [-T SEC]\nThe '-k' option turns on HTTP keepalive.\nThe '-q' option turns off some chatter on stdout.\nThe '-T' option sets socket recv timeout (0 disables timeout, default is 5 sec).\nThe '-m' sets the maximum size (in bytes) for any buffer used to hold HTTP data sent by clients. (The default is 1 MB.)\n", cmd); + printf("Usage: %s [-p <port>] [-a <IPv4 address>] [-A <IPv6 address>] [-u <UNIX socket>] [-t <thread count>] [-m <bytes>] [-k] [-q] [-T SEC]\nThe '-k' option turns on HTTP keepalive.\nThe '-q' option turns off some chatter on stdout.\nThe '-T' option sets socket recv timeout (0 disables timeout, default is 5 sec).\nThe '-m' sets the maximum size (in bytes) for any buffer used to hold HTTP data sent by clients. (The default is 1 MB.)\n", cmd); } static void sigint(int signum) { @@ -346,6 +349,7 @@ union uw_sockaddr { struct sockaddr sa; struct sockaddr_in ipv4; struct sockaddr_in6 ipv6; + struct sockaddr_un un; }; int main(int argc, char *argv[]) { @@ -365,7 +369,7 @@ int main(int argc, char *argv[]) { my_addr.sa.sa_family = AF_INET; my_addr.ipv4.sin_addr.s_addr = INADDR_ANY; // auto-fill with my IP - while ((opt = getopt(argc, argv, "hp:a:A:t:kqT:m:")) != -1) { + while ((opt = getopt(argc, argv, "hp:a:A:u:t:kqT:m:")) != -1) { switch (opt) { case '?': fprintf(stderr, "Unknown command-line option\n"); @@ -403,6 +407,15 @@ int main(int argc, char *argv[]) { } break; + case 'u': + my_addr.sa.sa_family = AF_UNIX; + if (!strncpy(my_addr.un.sun_path, optarg, sizeof(my_addr.un.sun_path)-1)) { + fprintf(stderr, "Invalid UNIX socket filename\n"); + help(argv[0]); + return 1; + } + break; + case 't': nthreads = atoi(optarg); if (nthreads <= 0) { @@ -472,6 +485,11 @@ int main(int argc, char *argv[]) { my_size = sizeof(my_addr.ipv6); my_addr.ipv6.sin6_port = htons(uw_port); break; + + case AF_UNIX: + unlink(my_addr.un.sun_path); + my_size = sizeof(my_addr.un); + break; } if (bind(sockfd, &my_addr.sa, my_size) < 0) { diff --git a/src/c/memmem.c b/src/c/memmem.c index f31f4e31..efddd0c1 100644 --- a/src/c/memmem.c +++ b/src/c/memmem.c @@ -1,4 +1,6 @@ -#include "config.h" +#include "memmem.h" + +#ifndef HAVE_MEMMEM /* $NetBSD$ */ @@ -38,8 +40,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -// Function renamed by Adam Chlipala in 2016. - #include <sys/cdefs.h> #if defined(LIBC_SCCS) && !defined(lint) __RCSID("$NetBSD$"); @@ -54,13 +54,8 @@ __RCSID("$NetBSD$"); #define NULL ((char *)0) #endif -/* - * urweb_memmem() returns the location of the first occurence of data - * pattern b2 of size len2 in memory block b1 of size len1 or - * NULL if none is found. - */ void * -urweb_memmem(const void *b1, size_t len1, const void *b2, size_t len2) +memmem(const void *b1, size_t len1, const void *b2, size_t len2) { /* Sanity check */ if(!(b1 != NULL && b2 != NULL && len1 != 0 && len2 != 0)) @@ -85,3 +80,5 @@ urweb_memmem(const void *b1, size_t len1, const void *b2, size_t len2) return NULL; } + +#endif // !defined(HAVE_MEMMEM) diff --git a/src/c/memmem.h b/src/c/memmem.h new file mode 100644 index 00000000..0ddbb494 --- /dev/null +++ b/src/c/memmem.h @@ -0,0 +1,23 @@ +#ifndef URWEB_MEMMEM_H +#define URWEB_MEMMEM_H + +#include "config.h" + +#ifdef HAVE_MEMMEM + +#include <string.h> + +#else // !defined(HAVE_MEMMEM) + +#include <stddef.h> + +/* + * memmem() returns the location of the first occurence of data + * pattern b2 of size len2 in memory block b1 of size len1 or + * NULL if none is found. + */ +void *memmem(const void *b1, size_t len1, const void *b2, size_t len2); + +#endif // !defined(HAVE_MEMMEM) + +#endif // URWEB_MEMMEM_H diff --git a/src/c/request.c b/src/c/request.c index 3e7ac34c..195b3cdc 100644 --- a/src/c/request.c +++ b/src/c/request.c @@ -11,13 +11,12 @@ #include <pthread.h> +#include "memmem.h" #include "urweb.h" #include "request.h" #define MAX_RETRIES 5 -void *urweb_memmem(const void *b1, size_t len1, const void *b2, size_t len2); - static int try_rollback(uw_context ctx, int will_retry, void *logger_data, uw_logger log_error) { int r = uw_rollback(ctx, will_retry); @@ -422,7 +421,7 @@ request_result uw_request(uw_request_context rc, uw_context ctx, } } - part = urweb_memmem(after_sub_headers, body + body_len - after_sub_headers, boundary, boundary_len); + part = memmem(after_sub_headers, body + body_len - after_sub_headers, boundary, boundary_len); if (!part) { log_error(logger_data, "Missing boundary after multipart payload\n"); return FAILED; diff --git a/src/c/urweb.c b/src/c/urweb.c index e7efae38..0db5fc80 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -20,6 +20,10 @@ #include <pthread.h> +#include <unicode/utf8.h> +#include <unicode/ustring.h> +#include <unicode/uchar.h> + #include "types.h" #include "uthash.h" @@ -737,7 +741,10 @@ void uw_close(uw_context ctx) { } uw_Basis_string uw_Basis_requestHeader(uw_context ctx, uw_Basis_string h) { - return ctx->get_header(ctx->get_header_data, h); + if (ctx->get_header) + return ctx->get_header(ctx->get_header_data, h); + else + return NULL; } void uw_set_headers(uw_context ctx, char *(*get_header)(void *, const char *), void *get_header_data) { @@ -896,9 +903,12 @@ char *uw_error_message(uw_context ctx) { return ctx->error_message; } -void uw_set_error_message(uw_context ctx, const char *msg) { - strncpy(ctx->error_message, msg, sizeof(ctx->error_message)); - ctx->error_message[sizeof(ctx->error_message)-1] = 0; +void uw_set_error_message(uw_context ctx, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + + vsnprintf(ctx->error_message, ERROR_BUF_LEN, fmt, ap); + ctx->error_message[ERROR_BUF_LEN-1] = 0; } static input *INP(uw_context ctx) { @@ -1553,94 +1563,90 @@ const char *uw_Basis_get_settings(uw_context ctx, uw_unit u) { } } +uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char ch); + +static void jsifyChar(char **buffer_ptr, uw_context ctx, uw_Basis_char c1) { + char* buffer = *buffer_ptr; + + switch (c1) { + case '"': + strcpy(buffer, "\\\""); + buffer += 2; + break; + case '\'': + strcpy(buffer, "\\047"); + buffer += 4; + break; + case '\\': + strcpy(buffer, "\\\\"); + buffer += 2; + break; + case '<': + strcpy(buffer, "\\074"); + buffer += 4; + break; + case '&': + strcpy(buffer, "\\046"); + buffer += 4; + break; + default: + if (uw_Basis_isprint(ctx, c1)) { + int offset = 0; + U8_APPEND_UNSAFE(buffer, offset, c1); + buffer += offset; + } else { + if(65536 > c1) { + sprintf(buffer, "\\u%04x", c1); + buffer += 6; + } else { + sprintf(buffer, "\\u{%06x}", c1); + buffer += 10; + } + } + } + + *buffer_ptr = buffer; +} + uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) { char *r, *s2; + uw_Basis_char c; - uw_check_heap(ctx, strlen(s) * 4 + 3); + uw_check_heap(ctx, strlen(s) * 10 + 3); r = s2 = ctx->heap.front; *s2++ = '"'; - for (; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '"': - strcpy(s2, "\\\""); - s2 += 2; - break; - case '\'': - strcpy(s2, "\\047"); - s2 += 4; - break; - case '\\': - strcpy(s2, "\\\\"); - s2 += 2; - break; - case '<': - strcpy(s2, "\\074"); - s2 += 4; - break; - case '&': - strcpy(s2, "\\046"); - s2 += 4; - break; - default: - if (isprint((int)c) || c >= 128) - *s2++ = c; - else { - sprintf(s2, "\\%03o", c); - s2 += 4; - } + int offset = 0; + while(s[offset] != 0) + { + U8_NEXT(s, offset, -1, c); + + jsifyChar(&s2, ctx, c); } - } strcpy(s2, "\""); ctx->heap.front = s2 + 2; + return r; } +uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c); + uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) { - unsigned char c = c1; char *r, *s2; - uw_check_heap(ctx, 7); + uw_check_heap(ctx, 10); r = s2 = ctx->heap.front; + *s2++ = '"'; - - switch (c) { - case '"': - strcpy(s2, "\\\""); - s2 += 2; - break; - case '\'': - strcpy(s2, "\\047"); - s2 += 4; - break; - case '\\': - strcpy(s2, "\\\\"); - s2 += 2; - break; - case '<': - strcpy(s2, "\\074"); - s2 += 4; - break; - case '&': - strcpy(s2, "\\046"); - s2 += 4; - break; - default: - if (isprint((int)c) || c >= 128) - *s2++ = c; - else { - sprintf(s2, "\\%03o", (unsigned char)c); - s2 += 4; - } - } + + jsifyChar(&s2, ctx, c1); strcpy(s2, "\""); ctx->heap.front = s2 + 2; + return r; } @@ -1684,6 +1690,7 @@ uw_Basis_string uw_Basis_jsifyString_ws(uw_context ctx, uw_Basis_string s) { strcpy(s2, "\""); ctx->script.front = s2 + 1; + return r; } @@ -1951,29 +1958,61 @@ char *uw_Basis_urlifyFloat(uw_context ctx, uw_Basis_float n) { return r; } +static void aux_urlifyChar(char** ptr, uw_Basis_char c) { + char* p = *ptr; + + if((uint32_t)(c) <= 0x7f) { + sprintf(p, ".%02X", (uint8_t)(c)); + p += 3; + } else { + if((uint32_t)(c) <= 0x7ff) { + sprintf(p, ".%02X", (uint8_t)(((c)>>6)|0xc0)); + p += 3; + } else { + if((uint32_t)(c) <= 0xffff) { + sprintf(p, ".%02X", (uint8_t)(((c)>>12)|0xe0)); + p += 3; + } else { + sprintf(p, ".%02X", (uint8_t)(((c)>>18)|0xf0)); + p += 3; + sprintf(p, ".%02X", (uint8_t)((((c)>>12)&0x3f)|0x80)); + p += 3; + } + sprintf(p, ".%02X", (uint8_t)((((c)>>6)&0x3f)|0x80)); + p += 3; + } + sprintf(p, ".%02X", (uint8_t)(((c)&0x3f)|0x80)); + p += 3; + } + + *ptr = p; +} + char *uw_Basis_urlifyString(uw_context ctx, uw_Basis_string s) { char *r, *p; if (s[0] == '\0') return "_"; - uw_check_heap(ctx, strlen(s) * 3 + 1 + !!(s[0] == '_')); + uw_check_heap(ctx, strlen(s) * 12 + 1 + !!(s[0] == '_')); r = p = ctx->heap.front; if (s[0] == '_') *p++ = '_'; - for (; *s; s++) { - unsigned char c = *s; - - if (c == ' ') + uw_Basis_char c; + int offset = 0, curr = 0; + while (s[offset] != 0) { + U8_NEXT(s, offset, -1, c); + + if (U8_IS_SINGLE(s[curr]) && s[curr] == ' ') *p++ = '+'; - else if (isalnum(c)) - *p++ = c; + else if (U8_IS_SINGLE(s[curr]) && isalnum(s[curr])) + *p++ = s[curr]; else { - sprintf(p, ".%02X", c); - p += 3; + aux_urlifyChar(&p, c); } + curr = offset; } *p++ = 0; @@ -1983,7 +2022,7 @@ char *uw_Basis_urlifyString(uw_context ctx, uw_Basis_string s) { char *uw_Basis_urlifyBool(uw_context ctx, uw_Basis_bool b) { (void)ctx; - if (b == uw_Basis_False) + if (!b) return "0"; else return "1"; @@ -2043,6 +2082,29 @@ uw_unit uw_Basis_urlifyTime_w(uw_context ctx, uw_Basis_time t) { return uw_Basis_urlifyInt_w(ctx, (uw_Basis_int)t.seconds * 1000000 + t.microseconds); } +uw_unit uw_Basis_urlifyChar_w(uw_context ctx, uw_Basis_char c) { + if (c == '\0') { + uw_check(ctx, 1); + uw_writec_unsafe(ctx, '_'); + return uw_unit_v; + } + + uw_check(ctx, 12 + !!(c == '_')); + + if (c == '_') + uw_writec_unsafe(ctx, '_'); + + if (c == ' ') + uw_writec_unsafe(ctx, '+'); + else if (isalnum(c) && c <= 0x7f) + uw_writec_unsafe(ctx, c); + else { + aux_urlifyChar(&(ctx->page.front), c); + } + + return uw_unit_v; +} + uw_unit uw_Basis_urlifyString_w(uw_context ctx, uw_Basis_string s) { if (s[0] == '\0') { uw_check(ctx, 1); @@ -2050,29 +2112,31 @@ uw_unit uw_Basis_urlifyString_w(uw_context ctx, uw_Basis_string s) { return uw_unit_v; } - uw_check(ctx, strlen(s) * 3 + !!(s[0] == '_')); + uw_check(ctx, strlen(s) * 12 + !!(s[0] == '_')); if (s[0] == '_') uw_writec_unsafe(ctx, '_'); - for (; *s; s++) { - unsigned char c = *s; - - if (c == ' ') + uw_Basis_char c; + int offset = 0, curr = 0; + while (s[offset] != 0) { + U8_NEXT(s, offset, -1, c); + + if (U8_IS_SINGLE(s[curr]) && s[curr] == ' ') uw_writec_unsafe(ctx, '+'); - else if (isalnum(c)) - uw_writec_unsafe(ctx, c); - else { - sprintf(ctx->page.front, ".%02X", c); - ctx->page.front += 3; + else if (U8_IS_SINGLE(s[curr]) && isalnum(s[curr])) + uw_writec_unsafe(ctx, s[curr]); + else { + aux_urlifyChar(&(ctx->page.front), c); } + curr = offset; } return uw_unit_v; } uw_unit uw_Basis_urlifyBool_w(uw_context ctx, uw_Basis_bool b) { - if (b == uw_Basis_False) + if (!b) uw_writec(ctx, '0'); else uw_writec(ctx, '1'); @@ -2207,6 +2271,23 @@ uw_Basis_string uw_Basis_unurlifyString(uw_context ctx, char **s) { return r; } +uw_Basis_char uw_Basis_unurlifyChar(uw_context ctx, char **s) { + char *new_s = uw_unurlify_advance(*s); + char *r; + int len; + + len = strlen(*s); + uw_check_heap(ctx, len + 1); + + r = ctx->heap.front; + ctx->heap.front = uw_unurlifyString_to(0, ctx, ctx->heap.front, *s); + *s = new_s; + if (strlen(r) == 1) + return r[0]; + else + uw_error(ctx, FATAL, "Unurlified character is multiple characters long"); +} + uw_Basis_unit uw_Basis_unurlifyUnit(uw_context ctx, char **s) { (void)ctx; *s = uw_unurlify_advance(*s); @@ -2249,25 +2330,40 @@ uw_unit uw_Basis_htmlifyInt_w(uw_context ctx, uw_Basis_int n) { return uw_unit_v; } -char *uw_Basis_htmlifySpecialChar(uw_context ctx, unsigned char ch) { +char *uw_Basis_htmlifySpecialChar(uw_context ctx, uw_Basis_char ch) { unsigned int n = ch; int len; char *r; - uw_check_heap(ctx, INTS_MAX+3); + uw_check_heap(ctx, INTS_MAX+3 + 1); r = ctx->heap.front; - sprintf(r, "&#%u;%n", n, &len); + len = sprintf(r, "&#%u;", n); ctx->heap.front += len+1; + return r; } -uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, unsigned char ch) { +uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, uw_Basis_char ch) { unsigned int n = ch; - int len; + int len = 0; uw_check(ctx, INTS_MAX+3); - sprintf(ctx->page.front, "&#%u;%n", n, &len); + + if(uw_Basis_isprint(ctx, ch)) { + + int32_t len_written = 0; + UErrorCode err = U_ZERO_ERROR; + + u_strToUTF8(ctx->page.front, 5, &len_written, (const UChar*)&ch, 1, &err); + len = len_written; + } + + // either it's a non-printable character, or we failed to convert to UTF-8 + if(len == 0) { + len = sprintf(ctx->page.front, "&#%u;", n); + } ctx->page.front += len; + return uw_unit_v; } @@ -2315,23 +2411,35 @@ uw_unit uw_Basis_jsifyInt_w(uw_context ctx, uw_Basis_int n) { char *uw_Basis_htmlifyString(uw_context ctx, const char *s) { char *r, *s2; + uw_Basis_char c1; + int oldoffset = 0, offset = 0, offset2 = 0, len = 0; + + uw_check_heap(ctx, strlen(s) * (INTS_MAX + 3) + 1); - uw_check_heap(ctx, strlen(s) * 5 + 1); - - for (r = s2 = ctx->heap.front; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '<': - strcpy(s2, "<"); - s2 += 4; - break; - case '&': - strcpy(s2, "&"); - s2 += 5; - break; - default: - *s2++ = c; + r = s2 = ctx->heap.front; + + while (s[offset] != 0) { + oldoffset = offset; + U8_NEXT(s, offset, -1, c1); + + if ((offset - oldoffset == 1) && uw_Basis_isprint(ctx, c1)) { + switch (c1) { + case '<': + strcpy(s2, "<"); + s2 += 4; + break; + case '&': + strcpy(s2, "&"); + s2 += 5; + break; + default: + offset2 = 0; + U8_APPEND_UNSAFE(s2, offset2, c1); + s2 += offset2; + } + } else { + len = sprintf(s2, "&#%u;", c1); + s2 += len; } } @@ -2342,20 +2450,29 @@ char *uw_Basis_htmlifyString(uw_context ctx, const char *s) { uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { uw_check(ctx, strlen(s) * 6); - - for (; *s; s++) { - unsigned char c = *s; - - switch (c) { - case '<': - uw_write_unsafe(ctx, "<"); - break; - case '&': - uw_write_unsafe(ctx, "&"); - break; - default: - uw_writec_unsafe(ctx, c); + int offset = 0, oldoffset = 0; + uw_Basis_char c1; + + while(s[offset] != 0){ + oldoffset = offset; + U8_NEXT(s, offset, -1, c1); + + if ((offset - oldoffset == 1) && uw_Basis_isprint(ctx, c1)) { + + switch (c1) { + case '<': + uw_write_unsafe(ctx, "<"); + break; + case '&': + uw_write_unsafe(ctx, "&"); + break; + default: + uw_writec_unsafe(ctx, c1); + } } + else { + uw_Basis_htmlifySpecialChar_w(ctx, c1); + } } return uw_unit_v; @@ -2363,14 +2480,14 @@ uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { uw_Basis_string uw_Basis_htmlifyBool(uw_context ctx, uw_Basis_bool b) { (void)ctx; - if (b == uw_Basis_False) + if (!b) return "False"; else return "True"; } uw_unit uw_Basis_htmlifyBool_w(uw_context ctx, uw_Basis_bool b) { - if (b == uw_Basis_False) { + if (!b) { uw_check(ctx, 6); strcpy(ctx->page.front, "False"); ctx->page.front += 5; @@ -2419,27 +2536,33 @@ uw_unit uw_Basis_htmlifySource_w(uw_context ctx, uw_Basis_source src) { } uw_Basis_char uw_Basis_strsub(uw_context ctx, uw_Basis_string s, uw_Basis_int n) { + uw_Basis_char c; + int offset = 0; + while (n >= 0) { - if (*s == 0) + + if (s[offset] == 0) uw_error(ctx, FATAL, "Out-of-bounds strsub"); + U8_NEXT(s, offset, -1, c); + if (n == 0) - return *s; + return c; --n; - ++s; } uw_error(ctx, FATAL, "Negative strsub bound"); } uw_Basis_string uw_Basis_strsuffix(uw_context ctx, uw_Basis_string s, uw_Basis_int n) { + int offset = 0; while (n >= 0) { - if (*s == 0 || n == 0) - return s; + if (s[offset] == 0 || n == 0) + return s + offset; + U8_FWD_1(s, offset, -1); --n; - ++s; } uw_error(ctx, FATAL, "Negative strsuffix bound"); @@ -2447,40 +2570,81 @@ uw_Basis_string uw_Basis_strsuffix(uw_context ctx, uw_Basis_string s, uw_Basis_i uw_Basis_int uw_Basis_strlen(uw_context ctx, uw_Basis_string s) { (void)ctx; - return strlen(s); + int offset = 0, iterations = 0; + while (s[offset] != 0) { + U8_FWD_1(s, offset, -1); + ++iterations; + } + return iterations; } uw_Basis_bool uw_Basis_strlenGe(uw_context ctx, uw_Basis_string s, uw_Basis_int n) { (void)ctx; - + int offset = 0; while (n > 0) { - if (*s == 0) + if (s[offset] == 0) return uw_Basis_False; - + + U8_FWD_1(s, offset, -1); --n; - ++s; } return uw_Basis_True; } +static int aux_strchr(uw_Basis_string s, uw_Basis_char ch, int *o_offset) { + int u8idx = 0, offset = 0, offsetpr = 0; + uw_Basis_char c; + + while (s[offset] != 0) { + U8_NEXT(s, offset, -1, c); + if (c == ch) { + *o_offset = offsetpr; + return u8idx; + } + + offsetpr = offset; + ++u8idx; + } + + *o_offset = -1; + return -1; +} + uw_Basis_string uw_Basis_strchr(uw_context ctx, uw_Basis_string s, uw_Basis_char ch) { (void)ctx; - return strchr(s, ch); + int offset = -1; + if (aux_strchr(s, ch, &offset) > -1) { + return s + offset; + } + return NULL; } uw_Basis_int uw_Basis_strcspn(uw_context ctx, uw_Basis_string s, uw_Basis_string chs) { (void)ctx; - return strcspn(s, chs); + int offset = 0, u8idx = 0, offsetChs = 0; + uw_Basis_char c; + + while (s[offset] != 0) { + U8_NEXT(s, offset, -1, c); + if (aux_strchr(chs, c, &offsetChs) > -1) { + return u8idx; + } + ++u8idx; + } + + return u8idx; } uw_Basis_int *uw_Basis_strindex(uw_context ctx, uw_Basis_string s, uw_Basis_char ch) { - uw_Basis_string r = strchr(s, ch); - if (r == NULL) + (void)ctx; + int offset = -1; + int r = aux_strchr(s, ch, &offset); + if (r == -1) return NULL; else { uw_Basis_int *nr = uw_malloc(ctx, sizeof(uw_Basis_int)); - *nr = r - s; + *nr = r; return nr; } } @@ -2491,13 +2655,19 @@ uw_Basis_int *uw_Basis_strsindex(uw_context ctx, const char *haystack, const cha return NULL; else { uw_Basis_int *nr = uw_malloc(ctx, sizeof(uw_Basis_int)); - *nr = r - haystack; + int src = r - haystack, offset = 0, utf8idx = 0; + while (offset < src) { + U8_FWD_1(haystack, offset, -1); + ++utf8idx; + } + + *nr = utf8idx; return nr; } } uw_Basis_string uw_Basis_strcat(uw_context ctx, uw_Basis_string s1, uw_Basis_string s2) { - int len = uw_Basis_strlen(ctx, s1) + uw_Basis_strlen(ctx, s2) + 1; + int len = strlen(s1) + strlen(s2) + 1; char *s; uw_check_heap(ctx, len); @@ -2512,8 +2682,8 @@ uw_Basis_string uw_Basis_strcat(uw_context ctx, uw_Basis_string s1, uw_Basis_str } uw_Basis_string uw_Basis_substring(uw_context ctx, uw_Basis_string s, uw_Basis_int start, uw_Basis_int len) { - size_t full_len = uw_Basis_strlen(ctx, s); - + int full_len = uw_Basis_strlen(ctx, s); + if (start < 0) uw_error(ctx, FATAL, "substring: Negative start index"); if (len < 0) @@ -2521,32 +2691,53 @@ uw_Basis_string uw_Basis_substring(uw_context ctx, uw_Basis_string s, uw_Basis_i if (start + len > full_len) uw_error(ctx, FATAL, "substring: Start index plus length is too large"); - if (start + len == full_len) - return &s[start]; - else { - uw_Basis_string r = uw_malloc(ctx, len+1); - memcpy(r, s+start, len); - r[len] = 0; + int offset = 0; + U8_FWD_N(s, offset, -1, start); + + if (start + len == full_len) { + return s + offset; + } else { + int end = offset; + U8_FWD_N(s, end, -1, len); + + int actual_len = end - offset; + + uw_Basis_string r = uw_malloc(ctx, actual_len + 1); + memcpy(r, s + offset, actual_len); + r[actual_len] = 0; return r; } - } uw_Basis_string uw_Basis_str1(uw_context ctx, uw_Basis_char ch) { char *r; - - uw_check_heap(ctx, 2); + int req = U8_LENGTH(ch); + int offset = 0; + + uw_check_heap(ctx, req + 1); r = ctx->heap.front; - r[0] = ch; - r[1] = 0; - ctx->heap.front += 2; + U8_APPEND_UNSAFE(r, offset, ch); + r[req] = 0; - return r; + ctx->heap.front += req + 1; + return r; +} + +uw_Basis_string uw_Basis_ofUnicode(uw_context ctx, uw_Basis_int n) { + UChar buf16[] = {n}; + uw_Basis_string out = uw_malloc(ctx, 3); + int32_t outLen; + UErrorCode pErrorCode = 0; + + if (u_strToUTF8(out, 3, &outLen, buf16, 1, &pErrorCode) == NULL || outLen == 0) + uw_error(ctx, FATAL, "Bad Unicode string to unescape (error %s)", u_errorName(pErrorCode)); + + return out; } uw_Basis_string uw_strdup(uw_context ctx, uw_Basis_string s1) { - int len = uw_Basis_strlen(ctx, s1) + 1; + int len = strlen(s1) + 1; char *s; uw_check_heap(ctx, len); @@ -2673,7 +2864,6 @@ uw_Basis_string uw_Basis_sqlifyString(uw_context ctx, uw_Basis_string s) { uw_Basis_string uw_Basis_sqlifyChar(uw_context ctx, uw_Basis_char c) { char *r, *s2; - uw_check_heap(ctx, 5 + uw_Estrings + strlen(uw_sqlsuffixChar)); r = s2 = ctx->heap.front; @@ -2818,7 +3008,7 @@ uw_Basis_string uw_Basis_sqlifyStringN(uw_context ctx, uw_Basis_string s) { char *uw_Basis_sqlifyBool(uw_context ctx, uw_Basis_bool b) { (void)ctx; - if (b == uw_Basis_False) + if (!b) return "FALSE"; else return "TRUE"; @@ -2902,7 +3092,7 @@ char *uw_Basis_ensqlBool(uw_Basis_bool b) { static uw_Basis_int true = 1; static uw_Basis_int false = 0; - if (b == uw_Basis_False) + if (!b) return (char *)&false; else return (char *)&true; @@ -2931,15 +3121,12 @@ uw_Basis_string uw_Basis_floatToString(uw_context ctx, uw_Basis_float n) { } uw_Basis_string uw_Basis_charToString(uw_context ctx, uw_Basis_char ch) { - char *r = uw_malloc(ctx, 2); - r[0] = ch; - r[1] = 0; - return r; + return uw_Basis_str1(ctx, ch); } uw_Basis_string uw_Basis_boolToString(uw_context ctx, uw_Basis_bool b) { (void)ctx; - if (b == uw_Basis_False) + if (!b) return "False"; else return "True"; @@ -2994,11 +3181,12 @@ uw_Basis_char *uw_Basis_stringToChar(uw_context ctx, uw_Basis_string s) { uw_Basis_char *r = uw_malloc(ctx, 1); r[0] = 0; return r; - } else if (s[1] != 0) + } else if (uw_Basis_strlenGe(ctx, s, 2)) return NULL; else { uw_Basis_char *r = uw_malloc(ctx, 1); - r[0] = s[0]; + int offset = 0; + U8_NEXT(s, offset, -1, *r); return r; } } @@ -3123,10 +3311,14 @@ uw_Basis_float uw_Basis_stringToFloat_error(uw_context ctx, uw_Basis_string s) { uw_Basis_char uw_Basis_stringToChar_error(uw_context ctx, uw_Basis_string s) { if (s[0] == 0) return 0; - else if (s[1] != 0) + else if (uw_Basis_strlenGe(ctx, s, 2)) uw_error(ctx, FATAL, "Can't parse char: %s", uw_Basis_htmlifyString(ctx, s)); - else - return s[0]; + else { + uw_Basis_char c; + int offset = 0; + U8_NEXT(s, offset, -1, c); + return c; + } } uw_Basis_bool uw_Basis_stringToBool_error(uw_context ctx, uw_Basis_string s) { @@ -3229,10 +3421,19 @@ uw_Basis_blob uw_Basis_stringToBlob_error(uw_context ctx, uw_Basis_string s, siz s += 2; while (*s) { + char a = s[0]; + s += 1; + char b; + if (*s){ + b = s[0]; + } else { + b = 0; + } int n; - sscanf(s, "%02x", &n); + char buf[3] = {a, b, 0}; + n = strtol(buf, NULL, 16); *r++ = n; - s += 2; + s += 1; } } else { while (*s) { @@ -3986,6 +4187,20 @@ uw_Basis_blob uw_Basis_textBlob(uw_context ctx, uw_Basis_string s) { return b; } +uw_Basis_string uw_Basis_textOfBlob(uw_context ctx, uw_Basis_blob b) { + size_t i; + uw_Basis_string r; + + for (i = 0; i < b.size; ++i) + if (b.data[i] == 0) + return NULL; + + r = uw_malloc(ctx, b.size + 1); + memcpy(r, b.data, b.size); + r[b.size] = 0; + return r; +} + uw_Basis_blob uw_Basis_fileData(uw_context ctx, uw_Basis_file f) { (void)ctx; return f.data; @@ -4235,7 +4450,7 @@ uw_Basis_time uw_Basis_fromDatetime(uw_context ctx, uw_Basis_int year, uw_Basis_ struct tm tm = { .tm_year = year - 1900, .tm_mon = month, .tm_mday = day, .tm_hour = hour, .tm_min = minute, .tm_sec = second, .tm_isdst = -1 }; - uw_Basis_time r = { timelocal(&tm) }; + uw_Basis_time r = { mktime(&tm) }; return r; } @@ -4325,88 +4540,108 @@ void uw_set_global(uw_context ctx, char *name, void *data, void (*free)(void*)) uw_Basis_bool uw_Basis_isalnum(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isalnum((int)c); + return !!u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM); } uw_Basis_bool uw_Basis_isalpha(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isalpha((int)c); + return !!u_hasBinaryProperty(c, UCHAR_ALPHABETIC); } uw_Basis_bool uw_Basis_isblank(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isblank((int)c); + return !!u_hasBinaryProperty(c, UCHAR_POSIX_BLANK); } uw_Basis_bool uw_Basis_iscntrl(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!iscntrl((int)c); + return !!(u_charType(c)==U_CONTROL_CHAR); } uw_Basis_bool uw_Basis_isdigit(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isdigit((int)c); + return !!u_isdigit(c); } uw_Basis_bool uw_Basis_isgraph(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isgraph((int)c); + return !!u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH); } uw_Basis_bool uw_Basis_islower(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!islower((int)c); + return !!u_hasBinaryProperty(c, UCHAR_LOWERCASE); } uw_Basis_bool uw_Basis_isprint(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isprint((int)c); + return !!u_hasBinaryProperty(c, UCHAR_POSIX_PRINT); } uw_Basis_bool uw_Basis_ispunct(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!ispunct((int)c); + return !!u_ispunct(c); } uw_Basis_bool uw_Basis_isspace(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isspace((int)c); + return !!u_hasBinaryProperty(c, UCHAR_WHITE_SPACE); } uw_Basis_bool uw_Basis_isupper(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isupper((int)c); + return !!u_hasBinaryProperty(c, UCHAR_UPPERCASE); } uw_Basis_bool uw_Basis_isxdigit(uw_context ctx, uw_Basis_char c) { (void)ctx; - return !!isxdigit((int)c); + return !!(c <= 0x7f && u_isxdigit(c)); } uw_Basis_char uw_Basis_tolower(uw_context ctx, uw_Basis_char c) { (void)ctx; - return tolower((int)c); + return u_tolower(c); } uw_Basis_char uw_Basis_toupper(uw_context ctx, uw_Basis_char c) { (void)ctx; - return toupper((int)c); + return u_toupper(c); } uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c) { (void)ctx; - return (unsigned char)c; + return (uw_Basis_int)c; +} + +uw_Basis_bool uw_Basis_iscodepoint(uw_context ctx, uw_Basis_int n) { + (void)ctx; + return !!(n <= 0x10FFFF); +} + +uw_Basis_bool uw_Basis_issingle(uw_context ctx, uw_Basis_char c) { + (void)ctx; + return !!(c < 128); } uw_Basis_char uw_Basis_chr(uw_context ctx, uw_Basis_int n) { (void)ctx; - return n; + uw_Basis_char ch = (uw_Basis_char)n; + + if (n > 0x10FFFF) { + uw_error(ctx, FATAL, "The integer %lld is not a valid char codepoint", n); + } + + return ch; } uw_Basis_string uw_Basis_currentUrl(uw_context ctx) { return ctx->current_url; } +uw_Basis_string uw_Basis_anchorUrl(uw_context ctx, uw_Basis_string s) { + return uw_Basis_strcat(ctx, uw_Basis_strcat(ctx, ctx->current_url, "#"), s); +} + void uw_set_currentUrl(uw_context ctx, char *s) { ctx->current_url = s; } @@ -4654,7 +4889,7 @@ uw_Basis_string uw_Basis_atom(uw_context ctx, uw_Basis_string s) { for (p = s; *p; ++p) { char c = *p; - if (!isalnum((int)c) && c != '+' && c != '-' && c != '.' && c != '%' && c != '#') + if (!U8_IS_SINGLE(c) || (!isalnum((int)c) && c != '+' && c != '-' && c != '.' && c != '%' && c != '#')) uw_error(ctx, FATAL, "Disallowed character in CSS atom"); } @@ -4666,8 +4901,8 @@ uw_Basis_string uw_Basis_css_url(uw_context ctx, uw_Basis_string s) { for (p = s; *p; ++p) { char c = *p; - if (!isalnum((int)c) && c != ':' && c != '/' && c != '.' && c != '_' && c != '+' - && c != '-' && c != '%' && c != '?' && c != '&' && c != '=' && c != '#') + if (!U8_IS_SINGLE(c) || (!isalnum((int)c) && c != ':' && c != '/' && c != '.' && c != '_' && c != '+' + && c != '-' && c != '%' && c != '?' && c != '&' && c != '=' && c != '#')) uw_error(ctx, FATAL, "Disallowed character in CSS URL"); } @@ -4680,12 +4915,12 @@ uw_Basis_string uw_Basis_property(uw_context ctx, uw_Basis_string s) { if (!*s) uw_error(ctx, FATAL, "Empty CSS property"); - if (!islower((int)s[0]) && s[0] != '_') + if (!U8_IS_SINGLE(s[0]) || (!islower((int)s[0]) && s[0] != '_')) uw_error(ctx, FATAL, "Bad initial character in CSS property"); for (p = s; *p; ++p) { char c = *p; - if (!islower((int)c) && !isdigit((int)c) && c != '_' && c != '-') + if (!U8_IS_SINGLE(c) || (!islower((int)c) && !isdigit((int)c) && c != '_' && c != '-')) uw_error(ctx, FATAL, "Disallowed character in CSS property"); } @@ -4719,13 +4954,13 @@ uw_Basis_postField *uw_Basis_firstFormField(uw_context ctx, uw_Basis_string s) { f = uw_malloc(ctx, sizeof(uw_Basis_postField)); unurl = s; - f->name = uw_Basis_unurlifyString(ctx, &unurl); + f->name = uw_Basis_unurlifyString_fromClient(ctx, &unurl); s = strchr(s, 0); if (!s) uw_error(ctx, FATAL, "firstFormField: Missing null terminator"); ++s; unurl = s; - f->value = uw_Basis_unurlifyString(ctx, &unurl); + f->value = uw_Basis_unurlifyString_fromClient(ctx, &unurl); s = strchr(s, 0); if (!s) uw_error(ctx, FATAL, "firstFormField: Missing null terminator"); @@ -4738,7 +4973,7 @@ uw_Basis_string uw_Basis_blessData(uw_context ctx, uw_Basis_string s) { char *p = s; for (; *p; ++p) - if (!isalnum(*p) && *p != '-' && *p != '_') + if (!U8_IS_SINGLE(*p) || (!isalnum(*p) && *p != '-' && *p != '_')) uw_error(ctx, FATAL, "Illegal HTML5 data-* attribute: %s", s); return s; @@ -5070,7 +5305,7 @@ int strcmp_nullsafe(const char *str1, const char *str2) { static int is_valid_hash(uw_Basis_string hash) { for (; *hash; ++hash) - if (!isxdigit(*hash)) + if (!U8_IS_SINGLE(*hash) || !isxdigit(*hash)) return 0; return 1; @@ -5102,7 +5337,7 @@ uw_unit uw_Basis_cache_file(uw_context ctx, uw_Basis_blob contents) { fd = mkstemp(tempfile); if (fd < 0) - uw_error(ctx, FATAL, "Error creating temporary file for cache"); + uw_error(ctx, FATAL, "Error creating temporary file %s for cache", tempfile); while (written_so_far < contents.size) { ssize_t written_just_now = write(fd, contents.data + written_so_far, contents.size - written_so_far); @@ -5134,9 +5369,8 @@ uw_Basis_blob uw_Basis_check_filecache(uw_context ctx, uw_Basis_string hash) { // Hashes come formatted for printing by Postgres, which means they start with // two extra characters. Let's remove them. - if (!hash[0] || !hash[1]) - uw_error(ctx, FATAL, "Hash to check against file cache came in not in Postgres format: %s", hash); - hash += 2; + if (hash[0] == '\\' && hash[1] == 'x') + hash += 2; if (!dir) uw_error(ctx, FATAL, "Checking file cache when no directory is set"); |