diff options
author | 2017-11-19 21:10:04 -0800 | |
---|---|---|
committer | 2017-11-19 22:01:46 -0800 | |
commit | 8ffb773f43c8dc54801ca1d111854e7e881c93c9 (patch) | |
tree | 38133a2fc612597a75fed1d13e5b4042f58a2b7e /vendor/github.com/tdewolff/minify/xml |
First commit
Diffstat (limited to 'vendor/github.com/tdewolff/minify/xml')
-rw-r--r-- | vendor/github.com/tdewolff/minify/xml/buffer.go | 84 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/minify/xml/buffer_test.go | 37 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/minify/xml/xml.go | 193 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/minify/xml/xml_test.go | 129 |
4 files changed, 443 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/minify/xml/buffer.go b/vendor/github.com/tdewolff/minify/xml/buffer.go new file mode 100644 index 0000000..d3ce61c --- /dev/null +++ b/vendor/github.com/tdewolff/minify/xml/buffer.go @@ -0,0 +1,84 @@ +package xml // import "github.com/tdewolff/minify/xml" + +import "github.com/tdewolff/parse/xml" + +// Token is a single token unit with an attribute value (if given) and hash of the data. +type Token struct { + xml.TokenType + Data []byte + Text []byte + AttrVal []byte +} + +// TokenBuffer is a buffer that allows for token look-ahead. +type TokenBuffer struct { + l *xml.Lexer + + buf []Token + pos int +} + +// NewTokenBuffer returns a new TokenBuffer. +func NewTokenBuffer(l *xml.Lexer) *TokenBuffer { + return &TokenBuffer{ + l: l, + buf: make([]Token, 0, 8), + } +} + +func (z *TokenBuffer) read(t *Token) { + t.TokenType, t.Data = z.l.Next() + t.Text = z.l.Text() + if t.TokenType == xml.AttributeToken { + t.AttrVal = z.l.AttrVal() + } else { + t.AttrVal = nil + } +} + +// Peek returns the ith element and possibly does an allocation. +// Peeking past an error will panic. +func (z *TokenBuffer) Peek(pos int) *Token { + pos += z.pos + if pos >= len(z.buf) { + if len(z.buf) > 0 && z.buf[len(z.buf)-1].TokenType == xml.ErrorToken { + return &z.buf[len(z.buf)-1] + } + + c := cap(z.buf) + d := len(z.buf) - z.pos + p := pos - z.pos + 1 // required peek length + var buf []Token + if 2*p > c { + buf = make([]Token, 0, 2*c+p) + } else { + buf = z.buf + } + copy(buf[:d], z.buf[z.pos:]) + + buf = buf[:p] + pos -= z.pos + for i := d; i < p; i++ { + z.read(&buf[i]) + if buf[i].TokenType == xml.ErrorToken { + buf = buf[:i+1] + pos = i + break + } + } + z.pos, z.buf = 0, buf + } + return &z.buf[pos] +} + +// Shift returns the first element and advances position. +func (z *TokenBuffer) Shift() *Token { + if z.pos >= len(z.buf) { + t := &z.buf[:1][0] + z.read(t) + return t + } + t := &z.buf[z.pos] + z.pos++ + return t +} diff --git a/vendor/github.com/tdewolff/minify/xml/buffer_test.go b/vendor/github.com/tdewolff/minify/xml/buffer_test.go new file mode 100644 index 0000000..019cb12 --- /dev/null +++ b/vendor/github.com/tdewolff/minify/xml/buffer_test.go @@ -0,0 +1,37 @@ +package xml // import "github.com/tdewolff/minify/xml" + +import ( + "bytes" + "testing" + + "github.com/tdewolff/parse/xml" + "github.com/tdewolff/test" +) + +func TestBuffer(t *testing.T) { + // 0 12 3 45 6 7 8 9 0 + s := `<p><a href="//url">text</a>text<!--comment--></p>` + z := NewTokenBuffer(xml.NewLexer(bytes.NewBufferString(s))) + + tok := z.Shift() + test.That(t, string(tok.Text) == "p", "first token is <p>") + test.That(t, z.pos == 0, "shift first token and restore position") + test.That(t, len(z.buf) == 0, "shift first token and restore length") + + test.That(t, string(z.Peek(2).Text) == "href", "third token is href") + test.That(t, z.pos == 0, "don't change position after peeking") + test.That(t, len(z.buf) == 3, "two tokens after peeking") + + test.That(t, string(z.Peek(8).Text) == "p", "ninth token is <p>") + test.That(t, z.pos == 0, "don't change position after peeking") + test.That(t, len(z.buf) == 9, "nine tokens after peeking") + + test.That(t, z.Peek(9).TokenType == xml.ErrorToken, "tenth token is an error") + test.That(t, z.Peek(9) == z.Peek(10), "tenth and eleventh token are EOF") + test.That(t, len(z.buf) == 10, "ten tokens after peeking") + + _ = z.Shift() + tok = z.Shift() + test.That(t, string(tok.Text) == "a", "third token is <a>") + test.That(t, z.pos == 2, "don't change position after peeking") +} diff --git a/vendor/github.com/tdewolff/minify/xml/xml.go b/vendor/github.com/tdewolff/minify/xml/xml.go new file mode 100644 index 0000000..7974484 --- /dev/null +++ b/vendor/github.com/tdewolff/minify/xml/xml.go @@ -0,0 +1,193 @@ +// Package xml minifies XML1.0 following the specifications at http://www.w3.org/TR/xml/. +package xml // import "github.com/tdewolff/minify/xml" + +import ( + "io" + + "github.com/tdewolff/minify" + "github.com/tdewolff/parse" + "github.com/tdewolff/parse/xml" +) + +var ( + isBytes = []byte("=") + spaceBytes = []byte(" ") + voidBytes = []byte("/>") +) + +//////////////////////////////////////////////////////////////// + +// DefaultMinifier is the default minifier. +var DefaultMinifier = &Minifier{} + +// Minifier is an XML minifier. +type Minifier struct { + KeepWhitespace bool +} + +// Minify minifies XML data, it reads from r and writes to w. +func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error { + return DefaultMinifier.Minify(m, w, r, params) +} + +// Minify minifies XML data, it reads from r and writes to w. +func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error { + omitSpace := true // on true the next text token must not start with a space + + attrByteBuffer := make([]byte, 0, 64) + + l := xml.NewLexer(r) + defer l.Restore() + + tb := NewTokenBuffer(l) + for { + t := *tb.Shift() + if t.TokenType == xml.CDATAToken { + if text, useText := xml.EscapeCDATAVal(&attrByteBuffer, t.Text); useText { + t.TokenType = xml.TextToken + t.Data = text + } + } + switch t.TokenType { + case xml.ErrorToken: + if l.Err() == io.EOF { + return nil + } + return l.Err() + case xml.DOCTYPEToken: + if _, err := w.Write(t.Data); err != nil { + return err + } + case xml.CDATAToken: + if _, err := w.Write(t.Data); err != nil { + return err + } + if len(t.Text) > 0 && parse.IsWhitespace(t.Text[len(t.Text)-1]) { + omitSpace = true + } + case xml.TextToken: + t.Data = parse.ReplaceMultipleWhitespace(t.Data) + + // whitespace removal; trim left + if omitSpace && (t.Data[0] == ' ' || t.Data[0] == '\n') { + t.Data = t.Data[1:] + } + + // whitespace removal; trim right + omitSpace = false + if len(t.Data) == 0 { + omitSpace = true + } else if t.Data[len(t.Data)-1] == ' ' || t.Data[len(t.Data)-1] == '\n' { + omitSpace = true + i := 0 + for { + next := tb.Peek(i) + // trim if EOF, text token with whitespace begin or block token + if next.TokenType == xml.ErrorToken { + t.Data = t.Data[:len(t.Data)-1] + omitSpace = false + break + } else if next.TokenType == xml.TextToken { + // this only happens when a comment, doctype, cdata startpi tag was in between + // remove if the text token starts with a whitespace + if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) { + t.Data = t.Data[:len(t.Data)-1] + omitSpace = false + } + break + } else if next.TokenType == xml.CDATAToken { + if len(next.Text) > 0 && parse.IsWhitespace(next.Text[0]) { + t.Data = t.Data[:len(t.Data)-1] + omitSpace = false + } + break + } else if next.TokenType == xml.StartTagToken || next.TokenType == xml.EndTagToken { + if !o.KeepWhitespace { + t.Data = t.Data[:len(t.Data)-1] + omitSpace = false + } + break + } + i++ + } + } + + if _, err := w.Write(t.Data); err != nil { + return err + } + case xml.StartTagToken: + if o.KeepWhitespace { + omitSpace = false + } + if _, err := w.Write(t.Data); err != nil { + return err + } + case xml.StartTagPIToken: + if _, err := w.Write(t.Data); err != nil { + return err + } + case xml.AttributeToken: + if _, err := w.Write(spaceBytes); err != nil { + return err + } + if _, err := w.Write(t.Text); err != nil { + return err + } + if _, err := w.Write(isBytes); err != nil { + return err + } + + if len(t.AttrVal) < 2 { + if _, err := w.Write(t.AttrVal); err != nil { + return err + } + } else { + // prefer single or double quotes depending on what occurs more often in value + val := xml.EscapeAttrVal(&attrByteBuffer, t.AttrVal[1:len(t.AttrVal)-1]) + if _, err := w.Write(val); err != nil { + return err + } + } + case xml.StartTagCloseToken: + next := tb.Peek(0) + skipExtra := false + if next.TokenType == xml.TextToken && parse.IsAllWhitespace(next.Data) { + next = tb.Peek(1) + skipExtra = true + } + if next.TokenType == xml.EndTagToken { + // collapse empty tags to single void tag + tb.Shift() + if skipExtra { + tb.Shift() + } + if _, err := w.Write(voidBytes); err != nil { + return err + } + } else { + if _, err := w.Write(t.Text); err != nil { + return err + } + } + case xml.StartTagCloseVoidToken: + if _, err := w.Write(t.Text); err != nil { + return err + } + case xml.StartTagClosePIToken: + if _, err := w.Write(t.Text); err != nil { + return err + } + case xml.EndTagToken: + if o.KeepWhitespace { + omitSpace = false + } + if len(t.Data) > 3+len(t.Text) { + t.Data[2+len(t.Text)] = '>' + t.Data = t.Data[:3+len(t.Text)] + } + if _, err := w.Write(t.Data); err != nil { + return err + } + } + } +} diff --git a/vendor/github.com/tdewolff/minify/xml/xml_test.go b/vendor/github.com/tdewolff/minify/xml/xml_test.go new file mode 100644 index 0000000..5d99809 --- /dev/null +++ b/vendor/github.com/tdewolff/minify/xml/xml_test.go @@ -0,0 +1,129 @@ +package xml // import "github.com/tdewolff/minify/xml" + +import ( + "bytes" + "fmt" + "os" + "regexp" + "testing" + + "github.com/tdewolff/minify" + "github.com/tdewolff/test" +) + +func TestXML(t *testing.T) { + xmlTests := []struct { + xml string + expected string + }{ + {"<!-- comment -->", ""}, + {"<A>x</A>", "<A>x</A>"}, + {"<a><b>x</b></a>", "<a><b>x</b></a>"}, + {"<a><b>x\ny</b></a>", "<a><b>x\ny</b></a>"}, + {"<a> <![CDATA[ a ]]> </a>", "<a>a</a>"}, + {"<a >a</a >", "<a>a</a>"}, + {"<?xml version=\"1.0\" ?>", "<?xml version=\"1.0\"?>"}, + {"<x></x>", "<x/>"}, + {"<x> </x>", "<x/>"}, + {"<x a=\"b\"></x>", "<x a=\"b\"/>"}, + {"<x a=\"\"></x>", "<x a=\"\"/>"}, + {"<x a=a></x>", "<x a=a/>"}, + {"<x a=\" a \n\r\t b \"/>", "<x a=\" a b \"/>"}, + {"<x a=\"'b"\"></x>", "<x a=\"'b"\"/>"}, + {"<x a=\"""'\"></x>", "<x a='\"\"''/>"}, + {"<!DOCTYPE foo SYSTEM \"Foo.dtd\">", "<!DOCTYPE foo SYSTEM \"Foo.dtd\">"}, + {"text <!--comment--> text", "text text"}, + {"text\n<!--comment-->\ntext", "text\ntext"}, + {"<!doctype html>", "<!doctype html=>"}, // bad formatted, doctype must be uppercase and html must have attribute value + {"<x>\n<!--y-->\n</x>", "<x></x>"}, + {"<style>lala{color:red}</style>", "<style>lala{color:red}</style>"}, + {`cats and dogs `, `cats and dogs`}, + + {`</0`, `</0`}, // go fuzz + } + + m := minify.New() + for _, tt := range xmlTests { + t.Run(tt.xml, func(t *testing.T) { + r := bytes.NewBufferString(tt.xml) + w := &bytes.Buffer{} + err := Minify(m, w, r, nil) + test.Minify(t, tt.xml, err, w.String(), tt.expected) + }) + } +} + +func TestXMLKeepWhitespace(t *testing.T) { + xmlTests := []struct { + xml string + expected string + }{ + {`cats and dogs `, `cats and dogs`}, + {` <div> <i> test </i> <b> test </b> </div> `, `<div> <i> test </i> <b> test </b> </div>`}, + {"text\n<!--comment-->\ntext", "text\ntext"}, + {"text\n<!--comment-->text<!--comment--> text", "text\ntext text"}, + {"<x>\n<!--y-->\n</x>", "<x>\n</x>"}, + {"<style>lala{color:red}</style>", "<style>lala{color:red}</style>"}, + {"<x> <?xml?> </x>", "<x><?xml?> </x>"}, + {"<x> <![CDATA[ x ]]> </x>", "<x> x </x>"}, + {"<x> <![CDATA[ <<<<< ]]> </x>", "<x><![CDATA[ <<<<< ]]></x>"}, + } + + m := minify.New() + xmlMinifier := &Minifier{KeepWhitespace: true} + for _, tt := range xmlTests { + t.Run(tt.xml, func(t *testing.T) { + r := bytes.NewBufferString(tt.xml) + w := &bytes.Buffer{} + err := xmlMinifier.Minify(m, w, r, nil) + test.Minify(t, tt.xml, err, w.String(), tt.expected) + }) + } +} + +func TestReaderErrors(t *testing.T) { + r := test.NewErrorReader(0) + w := &bytes.Buffer{} + m := minify.New() + err := Minify(m, w, r, nil) + test.T(t, err, test.ErrPlain, "return error at first read") +} + +func TestWriterErrors(t *testing.T) { + errorTests := []struct { + xml string + n []int + }{ + {`<!DOCTYPE foo>`, []int{0}}, + {`<?xml?>`, []int{0, 1}}, + {`<a x=y z="val">`, []int{0, 1, 2, 3, 4, 8, 9}}, + {`<foo/>`, []int{1}}, + {`</foo>`, []int{0}}, + {`<foo></foo>`, []int{1}}, + {`<![CDATA[data<<<<<]]>`, []int{0}}, + {`text`, []int{0}}, + } + + m := minify.New() + for _, tt := range errorTests { + for _, n := range tt.n { + t.Run(fmt.Sprint(tt.xml, " ", tt.n), func(t *testing.T) { + r := bytes.NewBufferString(tt.xml) + w := test.NewErrorWriter(n) + err := Minify(m, w, r, nil) + test.T(t, err, test.ErrPlain) + }) + } + } +} + +//////////////////////////////////////////////////////////////// + +func ExampleMinify() { + m := minify.New() + m.AddFuncRegexp(regexp.MustCompile("[/+]xml$"), Minify) + + if err := m.Minify("text/xml", os.Stdout, os.Stdin); err != nil { + panic(err) + } +} |