aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/tdewolff/minify/xml
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2017-11-19 21:10:04 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2017-11-19 22:01:46 -0800
commit8ffb773f43c8dc54801ca1d111854e7e881c93c9 (patch)
tree38133a2fc612597a75fed1d13e5b4042f58a2b7e /vendor/github.com/tdewolff/minify/xml
First commit
Diffstat (limited to 'vendor/github.com/tdewolff/minify/xml')
-rw-r--r--vendor/github.com/tdewolff/minify/xml/buffer.go84
-rw-r--r--vendor/github.com/tdewolff/minify/xml/buffer_test.go37
-rw-r--r--vendor/github.com/tdewolff/minify/xml/xml.go193
-rw-r--r--vendor/github.com/tdewolff/minify/xml/xml_test.go129
4 files changed, 443 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/minify/xml/buffer.go b/vendor/github.com/tdewolff/minify/xml/buffer.go
new file mode 100644
index 0000000..d3ce61c
--- /dev/null
+++ b/vendor/github.com/tdewolff/minify/xml/buffer.go
@@ -0,0 +1,84 @@
+package xml // import "github.com/tdewolff/minify/xml"
+
+import "github.com/tdewolff/parse/xml"
+
+// Token is a single token unit with an attribute value (if given) and hash of the data.
+type Token struct {
+ xml.TokenType
+ Data []byte
+ Text []byte
+ AttrVal []byte
+}
+
+// TokenBuffer is a buffer that allows for token look-ahead.
+type TokenBuffer struct {
+ l *xml.Lexer
+
+ buf []Token
+ pos int
+}
+
+// NewTokenBuffer returns a new TokenBuffer.
+func NewTokenBuffer(l *xml.Lexer) *TokenBuffer {
+ return &TokenBuffer{
+ l: l,
+ buf: make([]Token, 0, 8),
+ }
+}
+
+func (z *TokenBuffer) read(t *Token) {
+ t.TokenType, t.Data = z.l.Next()
+ t.Text = z.l.Text()
+ if t.TokenType == xml.AttributeToken {
+ t.AttrVal = z.l.AttrVal()
+ } else {
+ t.AttrVal = nil
+ }
+}
+
+// Peek returns the ith element and possibly does an allocation.
+// Peeking past an error will panic.
+func (z *TokenBuffer) Peek(pos int) *Token {
+ pos += z.pos
+ if pos >= len(z.buf) {
+ if len(z.buf) > 0 && z.buf[len(z.buf)-1].TokenType == xml.ErrorToken {
+ return &z.buf[len(z.buf)-1]
+ }
+
+ c := cap(z.buf)
+ d := len(z.buf) - z.pos
+ p := pos - z.pos + 1 // required peek length
+ var buf []Token
+ if 2*p > c {
+ buf = make([]Token, 0, 2*c+p)
+ } else {
+ buf = z.buf
+ }
+ copy(buf[:d], z.buf[z.pos:])
+
+ buf = buf[:p]
+ pos -= z.pos
+ for i := d; i < p; i++ {
+ z.read(&buf[i])
+ if buf[i].TokenType == xml.ErrorToken {
+ buf = buf[:i+1]
+ pos = i
+ break
+ }
+ }
+ z.pos, z.buf = 0, buf
+ }
+ return &z.buf[pos]
+}
+
+// Shift returns the first element and advances position.
+func (z *TokenBuffer) Shift() *Token {
+ if z.pos >= len(z.buf) {
+ t := &z.buf[:1][0]
+ z.read(t)
+ return t
+ }
+ t := &z.buf[z.pos]
+ z.pos++
+ return t
+}
diff --git a/vendor/github.com/tdewolff/minify/xml/buffer_test.go b/vendor/github.com/tdewolff/minify/xml/buffer_test.go
new file mode 100644
index 0000000..019cb12
--- /dev/null
+++ b/vendor/github.com/tdewolff/minify/xml/buffer_test.go
@@ -0,0 +1,37 @@
+package xml // import "github.com/tdewolff/minify/xml"
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/tdewolff/parse/xml"
+ "github.com/tdewolff/test"
+)
+
+func TestBuffer(t *testing.T) {
+ // 0 12 3 45 6 7 8 9 0
+ s := `<p><a href="//url">text</a>text<!--comment--></p>`
+ z := NewTokenBuffer(xml.NewLexer(bytes.NewBufferString(s)))
+
+ tok := z.Shift()
+ test.That(t, string(tok.Text) == "p", "first token is <p>")
+ test.That(t, z.pos == 0, "shift first token and restore position")
+ test.That(t, len(z.buf) == 0, "shift first token and restore length")
+
+ test.That(t, string(z.Peek(2).Text) == "href", "third token is href")
+ test.That(t, z.pos == 0, "don't change position after peeking")
+ test.That(t, len(z.buf) == 3, "two tokens after peeking")
+
+ test.That(t, string(z.Peek(8).Text) == "p", "ninth token is <p>")
+ test.That(t, z.pos == 0, "don't change position after peeking")
+ test.That(t, len(z.buf) == 9, "nine tokens after peeking")
+
+ test.That(t, z.Peek(9).TokenType == xml.ErrorToken, "tenth token is an error")
+ test.That(t, z.Peek(9) == z.Peek(10), "tenth and eleventh token are EOF")
+ test.That(t, len(z.buf) == 10, "ten tokens after peeking")
+
+ _ = z.Shift()
+ tok = z.Shift()
+ test.That(t, string(tok.Text) == "a", "third token is <a>")
+ test.That(t, z.pos == 2, "don't change position after peeking")
+}
diff --git a/vendor/github.com/tdewolff/minify/xml/xml.go b/vendor/github.com/tdewolff/minify/xml/xml.go
new file mode 100644
index 0000000..7974484
--- /dev/null
+++ b/vendor/github.com/tdewolff/minify/xml/xml.go
@@ -0,0 +1,193 @@
+// Package xml minifies XML1.0 following the specifications at http://www.w3.org/TR/xml/.
+package xml // import "github.com/tdewolff/minify/xml"
+
+import (
+ "io"
+
+ "github.com/tdewolff/minify"
+ "github.com/tdewolff/parse"
+ "github.com/tdewolff/parse/xml"
+)
+
+var (
+ isBytes = []byte("=")
+ spaceBytes = []byte(" ")
+ voidBytes = []byte("/>")
+)
+
+////////////////////////////////////////////////////////////////
+
+// DefaultMinifier is the default minifier.
+var DefaultMinifier = &Minifier{}
+
+// Minifier is an XML minifier.
+type Minifier struct {
+ KeepWhitespace bool
+}
+
+// Minify minifies XML data, it reads from r and writes to w.
+func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
+ return DefaultMinifier.Minify(m, w, r, params)
+}
+
+// Minify minifies XML data, it reads from r and writes to w.
+func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
+ omitSpace := true // on true the next text token must not start with a space
+
+ attrByteBuffer := make([]byte, 0, 64)
+
+ l := xml.NewLexer(r)
+ defer l.Restore()
+
+ tb := NewTokenBuffer(l)
+ for {
+ t := *tb.Shift()
+ if t.TokenType == xml.CDATAToken {
+ if text, useText := xml.EscapeCDATAVal(&attrByteBuffer, t.Text); useText {
+ t.TokenType = xml.TextToken
+ t.Data = text
+ }
+ }
+ switch t.TokenType {
+ case xml.ErrorToken:
+ if l.Err() == io.EOF {
+ return nil
+ }
+ return l.Err()
+ case xml.DOCTYPEToken:
+ if _, err := w.Write(t.Data); err != nil {
+ return err
+ }
+ case xml.CDATAToken:
+ if _, err := w.Write(t.Data); err != nil {
+ return err
+ }
+ if len(t.Text) > 0 && parse.IsWhitespace(t.Text[len(t.Text)-1]) {
+ omitSpace = true
+ }
+ case xml.TextToken:
+ t.Data = parse.ReplaceMultipleWhitespace(t.Data)
+
+ // whitespace removal; trim left
+ if omitSpace && (t.Data[0] == ' ' || t.Data[0] == '\n') {
+ t.Data = t.Data[1:]
+ }
+
+ // whitespace removal; trim right
+ omitSpace = false
+ if len(t.Data) == 0 {
+ omitSpace = true
+ } else if t.Data[len(t.Data)-1] == ' ' || t.Data[len(t.Data)-1] == '\n' {
+ omitSpace = true
+ i := 0
+ for {
+ next := tb.Peek(i)
+ // trim if EOF, text token with whitespace begin or block token
+ if next.TokenType == xml.ErrorToken {
+ t.Data = t.Data[:len(t.Data)-1]
+ omitSpace = false
+ break
+ } else if next.TokenType == xml.TextToken {
+ // this only happens when a comment, doctype, cdata startpi tag was in between
+ // remove if the text token starts with a whitespace
+ if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) {
+ t.Data = t.Data[:len(t.Data)-1]
+ omitSpace = false
+ }
+ break
+ } else if next.TokenType == xml.CDATAToken {
+ if len(next.Text) > 0 && parse.IsWhitespace(next.Text[0]) {
+ t.Data = t.Data[:len(t.Data)-1]
+ omitSpace = false
+ }
+ break
+ } else if next.TokenType == xml.StartTagToken || next.TokenType == xml.EndTagToken {
+ if !o.KeepWhitespace {
+ t.Data = t.Data[:len(t.Data)-1]
+ omitSpace = false
+ }
+ break
+ }
+ i++
+ }
+ }
+
+ if _, err := w.Write(t.Data); err != nil {
+ return err
+ }
+ case xml.StartTagToken:
+ if o.KeepWhitespace {
+ omitSpace = false
+ }
+ if _, err := w.Write(t.Data); err != nil {
+ return err
+ }
+ case xml.StartTagPIToken:
+ if _, err := w.Write(t.Data); err != nil {
+ return err
+ }
+ case xml.AttributeToken:
+ if _, err := w.Write(spaceBytes); err != nil {
+ return err
+ }
+ if _, err := w.Write(t.Text); err != nil {
+ return err
+ }
+ if _, err := w.Write(isBytes); err != nil {
+ return err
+ }
+
+ if len(t.AttrVal) < 2 {
+ if _, err := w.Write(t.AttrVal); err != nil {
+ return err
+ }
+ } else {
+ // prefer single or double quotes depending on what occurs more often in value
+ val := xml.EscapeAttrVal(&attrByteBuffer, t.AttrVal[1:len(t.AttrVal)-1])
+ if _, err := w.Write(val); err != nil {
+ return err
+ }
+ }
+ case xml.StartTagCloseToken:
+ next := tb.Peek(0)
+ skipExtra := false
+ if next.TokenType == xml.TextToken && parse.IsAllWhitespace(next.Data) {
+ next = tb.Peek(1)
+ skipExtra = true
+ }
+ if next.TokenType == xml.EndTagToken {
+ // collapse empty tags to single void tag
+ tb.Shift()
+ if skipExtra {
+ tb.Shift()
+ }
+ if _, err := w.Write(voidBytes); err != nil {
+ return err
+ }
+ } else {
+ if _, err := w.Write(t.Text); err != nil {
+ return err
+ }
+ }
+ case xml.StartTagCloseVoidToken:
+ if _, err := w.Write(t.Text); err != nil {
+ return err
+ }
+ case xml.StartTagClosePIToken:
+ if _, err := w.Write(t.Text); err != nil {
+ return err
+ }
+ case xml.EndTagToken:
+ if o.KeepWhitespace {
+ omitSpace = false
+ }
+ if len(t.Data) > 3+len(t.Text) {
+ t.Data[2+len(t.Text)] = '>'
+ t.Data = t.Data[:3+len(t.Text)]
+ }
+ if _, err := w.Write(t.Data); err != nil {
+ return err
+ }
+ }
+ }
+}
diff --git a/vendor/github.com/tdewolff/minify/xml/xml_test.go b/vendor/github.com/tdewolff/minify/xml/xml_test.go
new file mode 100644
index 0000000..5d99809
--- /dev/null
+++ b/vendor/github.com/tdewolff/minify/xml/xml_test.go
@@ -0,0 +1,129 @@
+package xml // import "github.com/tdewolff/minify/xml"
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "regexp"
+ "testing"
+
+ "github.com/tdewolff/minify"
+ "github.com/tdewolff/test"
+)
+
+func TestXML(t *testing.T) {
+ xmlTests := []struct {
+ xml string
+ expected string
+ }{
+ {"<!-- comment -->", ""},
+ {"<A>x</A>", "<A>x</A>"},
+ {"<a><b>x</b></a>", "<a><b>x</b></a>"},
+ {"<a><b>x\ny</b></a>", "<a><b>x\ny</b></a>"},
+ {"<a> <![CDATA[ a ]]> </a>", "<a>a</a>"},
+ {"<a >a</a >", "<a>a</a>"},
+ {"<?xml version=\"1.0\" ?>", "<?xml version=\"1.0\"?>"},
+ {"<x></x>", "<x/>"},
+ {"<x> </x>", "<x/>"},
+ {"<x a=\"b\"></x>", "<x a=\"b\"/>"},
+ {"<x a=\"\"></x>", "<x a=\"\"/>"},
+ {"<x a=a></x>", "<x a=a/>"},
+ {"<x a=\" a \n\r\t b \"/>", "<x a=\" a b \"/>"},
+ {"<x a=\"&apos;b&quot;\"></x>", "<x a=\"'b&#34;\"/>"},
+ {"<x a=\"&quot;&quot;'\"></x>", "<x a='\"\"&#39;'/>"},
+ {"<!DOCTYPE foo SYSTEM \"Foo.dtd\">", "<!DOCTYPE foo SYSTEM \"Foo.dtd\">"},
+ {"text <!--comment--> text", "text text"},
+ {"text\n<!--comment-->\ntext", "text\ntext"},
+ {"<!doctype html>", "<!doctype html=>"}, // bad formatted, doctype must be uppercase and html must have attribute value
+ {"<x>\n<!--y-->\n</x>", "<x></x>"},
+ {"<style>lala{color:red}</style>", "<style>lala{color:red}</style>"},
+ {`cats and dogs `, `cats and dogs`},
+
+ {`</0`, `</0`}, // go fuzz
+ }
+
+ m := minify.New()
+ for _, tt := range xmlTests {
+ t.Run(tt.xml, func(t *testing.T) {
+ r := bytes.NewBufferString(tt.xml)
+ w := &bytes.Buffer{}
+ err := Minify(m, w, r, nil)
+ test.Minify(t, tt.xml, err, w.String(), tt.expected)
+ })
+ }
+}
+
+func TestXMLKeepWhitespace(t *testing.T) {
+ xmlTests := []struct {
+ xml string
+ expected string
+ }{
+ {`cats and dogs `, `cats and dogs`},
+ {` <div> <i> test </i> <b> test </b> </div> `, `<div> <i> test </i> <b> test </b> </div>`},
+ {"text\n<!--comment-->\ntext", "text\ntext"},
+ {"text\n<!--comment-->text<!--comment--> text", "text\ntext text"},
+ {"<x>\n<!--y-->\n</x>", "<x>\n</x>"},
+ {"<style>lala{color:red}</style>", "<style>lala{color:red}</style>"},
+ {"<x> <?xml?> </x>", "<x><?xml?> </x>"},
+ {"<x> <![CDATA[ x ]]> </x>", "<x> x </x>"},
+ {"<x> <![CDATA[ <<<<< ]]> </x>", "<x><![CDATA[ <<<<< ]]></x>"},
+ }
+
+ m := minify.New()
+ xmlMinifier := &Minifier{KeepWhitespace: true}
+ for _, tt := range xmlTests {
+ t.Run(tt.xml, func(t *testing.T) {
+ r := bytes.NewBufferString(tt.xml)
+ w := &bytes.Buffer{}
+ err := xmlMinifier.Minify(m, w, r, nil)
+ test.Minify(t, tt.xml, err, w.String(), tt.expected)
+ })
+ }
+}
+
+func TestReaderErrors(t *testing.T) {
+ r := test.NewErrorReader(0)
+ w := &bytes.Buffer{}
+ m := minify.New()
+ err := Minify(m, w, r, nil)
+ test.T(t, err, test.ErrPlain, "return error at first read")
+}
+
+func TestWriterErrors(t *testing.T) {
+ errorTests := []struct {
+ xml string
+ n []int
+ }{
+ {`<!DOCTYPE foo>`, []int{0}},
+ {`<?xml?>`, []int{0, 1}},
+ {`<a x=y z="val">`, []int{0, 1, 2, 3, 4, 8, 9}},
+ {`<foo/>`, []int{1}},
+ {`</foo>`, []int{0}},
+ {`<foo></foo>`, []int{1}},
+ {`<![CDATA[data<<<<<]]>`, []int{0}},
+ {`text`, []int{0}},
+ }
+
+ m := minify.New()
+ for _, tt := range errorTests {
+ for _, n := range tt.n {
+ t.Run(fmt.Sprint(tt.xml, " ", tt.n), func(t *testing.T) {
+ r := bytes.NewBufferString(tt.xml)
+ w := test.NewErrorWriter(n)
+ err := Minify(m, w, r, nil)
+ test.T(t, err, test.ErrPlain)
+ })
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////
+
+func ExampleMinify() {
+ m := minify.New()
+ m.AddFuncRegexp(regexp.MustCompile("[/+]xml$"), Minify)
+
+ if err := m.Minify("text/xml", os.Stdout, os.Stdin); err != nil {
+ panic(err)
+ }
+}