From a155ab6debe1f271f809cce2ff4a20b2626e7d69 Mon Sep 17 00:00:00 2001 From: Jebbs Date: Fri, 20 Dec 2019 10:31:52 +0800 Subject: Filter valid XML characters for UTF-8 XML documents before decoding This change should reduce "illegal character code" XML errors. --- reader/xml/decoder_test.go | 61 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-) (limited to 'reader/xml/decoder_test.go') diff --git a/reader/xml/decoder_test.go b/reader/xml/decoder_test.go index ea24bf8..1208ef8 100644 --- a/reader/xml/decoder_test.go +++ b/reader/xml/decoder_test.go @@ -11,19 +11,74 @@ import ( "testing" ) -func TestIllegalCharacters(t *testing.T) { +func TestUTF8WithIllegalCharacters(t *testing.T) { type myxml struct { XMLName xml.Name `xml:"rss"` Version string `xml:"version,attr"` Title string `xml:"title"` } - data := fmt.Sprintf(`%s`, "\x10") + expected := "Title & 中文标题" + data := fmt.Sprintf(`Title & 中文%s标题`, "\x10") + reader := strings.NewReader(data) + + var x myxml + + decoder := NewDecoder(reader) + err := decoder.Decode(&x) + if err != nil { + t.Error(err) + return + } + if x.Title != expected { + t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title) + } +} + +func TestWindows251WithIllegalCharacters(t *testing.T) { + type myxml struct { + XMLName xml.Name `xml:"rss"` + Version string `xml:"version,attr"` + Title string `xml:"title"` + } + + expected := "Title & 中文标题" + data := fmt.Sprintf(`Title & 中文%s标题`, "\x10") + reader := strings.NewReader(data) + var x myxml - decoder := NewDecoder(strings.NewReader(data)) + decoder := NewDecoder(reader) err := decoder.Decode(&x) if err != nil { t.Error(err) + return + } + if x.Title != expected { + t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title) + } +} + +func TestIllegalEncodingField(t *testing.T) { + type myxml struct { + XMLName xml.Name `xml:"rss"` + Version string `xml:"version,attr"` + Title string `xml:"title"` + } + + expected := "Title & 中文标题" + data := fmt.Sprintf(`Title & 中文%s标题`, "\x10") + reader := strings.NewReader(data) + + var x myxml + + decoder := NewDecoder(reader) + err := decoder.Decode(&x) + if err != nil { + t.Error(err) + return + } + if x.Title != expected { + t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title) } } -- cgit v1.2.3