From 5e2ebc973f19fe8e5fdbe20e102e445329b528b0 Mon Sep 17 00:00:00 2001
From: Oisín Mac Fhearaí <denpashogai@gmail.com>
Date: Wed, 28 Aug 2019 01:56:53 +0100
Subject: Minor cleanup -- handle the case where we couldn't successfully
 generate a UTF8 codepoint by outputting a HTML escape (the default behaviour
 before for all multi-byte characters).

---
 src/c/urweb.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/c/urweb.c b/src/c/urweb.c
index a76f0004..62561828 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -20,6 +20,7 @@
 
 #include <pthread.h>
 
+#include <unicode/utf8.h>
 #include <unicode/ustring.h>
 #include <unicode/uchar.h>
 
@@ -2344,7 +2345,7 @@ char *uw_Basis_htmlifySpecialChar(uw_context ctx, uw_Basis_char ch) {
 
 uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, uw_Basis_char ch) {
   unsigned int n = ch;
-  int len;
+  int len = 0;
 
   uw_check(ctx, INTS_MAX+3);
 
@@ -2359,7 +2360,10 @@ uw_unit uw_Basis_htmlifySpecialChar_w(uw_context ctx, uw_Basis_char ch) {
     sprintf(ctx->page.front, "%s", buf);
     // printf("buf: %s, hex: %x, len_written: %d, err: %s\n", buf, ch, len_written, u_errorName(err));
     len = len_written;
-  } else {
+  }
+
+  // either it's a non-printable character, or we failed to convert to UTF-8
+  if(len == 0) {
     len = sprintf(ctx->page.front, "&#%u;", n);
   }
   ctx->page.front += len;
-- 
cgit v1.2.3