diff options
author | Adam Chlipala <adam@chlipala.net> | 2019-09-06 12:53:36 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-09-06 12:53:36 -0400 |
commit | b506e44ebbf80d98bb1a39d5566e7cdf53b3fc78 (patch) | |
tree | 951447b0e14167dab1321a71a480ea7cdef90bd7 /src/c/urweb.c | |
parent | f54687e989372470c2848890e9499feb9fd70352 (diff) | |
parent | 0490176b675eb3ea36cd51fa5d1fd41a3126c10c (diff) |
Merge pull request #177 from DestyNova/use-utf8-instead-of-escapes
Try to use UTF-8 rather than numeric HTML escapes
Diffstat (limited to 'src/c/urweb.c')
-rw-r--r-- | src/c/urweb.c | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/src/c/urweb.c b/src/c/urweb.c index b820354f..af929269 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -21,6 +21,7 @@ #include <pthread.h> #include <unicode/utf8.h> +#include <unicode/ustring.h> #include <unicode/uchar.h> #include "types.h" @@ -2344,10 +2345,23 @@ char *uw_Basis_htmlifySpecialChar(uw_context ctx, uw_Basis_char ch) { uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, uw_Basis_char ch) { unsigned int n = ch; - int len; + int len = 0; uw_check(ctx, INTS_MAX+3); - len = sprintf(ctx->page.front, "&#%u;", n); + + if(uw_Basis_isprint(ctx, ch)) { + + int32_t len_written = 0; + UErrorCode err = U_ZERO_ERROR; + + u_strToUTF8(ctx->page.front, 5, &len_written, (const UChar*)&ch, 1, &err); + len = len_written; + } + + // either it's a non-printable character, or we failed to convert to UTF-8 + if(len == 0) { + len = sprintf(ctx->page.front, "&#%u;", n); + } ctx->page.front += len; return uw_unit_v; @@ -2459,7 +2473,7 @@ uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { else { uw_Basis_htmlifySpecialChar_w(ctx, c1); } - } + } return uw_unit_v; } |