aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/tdewolff/parse/html
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/tdewolff/parse/html')
-rw-r--r--vendor/github.com/tdewolff/parse/html/README.md98
-rw-r--r--vendor/github.com/tdewolff/parse/html/hash.go831
-rw-r--r--vendor/github.com/tdewolff/parse/html/hash_test.go58
-rw-r--r--vendor/github.com/tdewolff/parse/html/lex.go485
-rw-r--r--vendor/github.com/tdewolff/parse/html/lex_test.go262
-rw-r--r--vendor/github.com/tdewolff/parse/html/util.go129
-rw-r--r--vendor/github.com/tdewolff/parse/html/util_test.go43
7 files changed, 1906 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/html/README.md b/vendor/github.com/tdewolff/parse/html/README.md
new file mode 100644
index 0000000..edca629
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/html/README.md
@@ -0,0 +1,98 @@
+# HTML [![GoDoc](http://godoc.org/github.com/tdewolff/parse/html?status.svg)](http://godoc.org/github.com/tdewolff/parse/html) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/html)](http://gocover.io/github.com/tdewolff/parse/html)
+
+This package is an HTML5 lexer written in [Go][1]. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF.
+
+## Installation
+Run the following command
+
+ go get github.com/tdewolff/parse/html
+
+or add the following import and run project with `go get`
+
+ import "github.com/tdewolff/parse/html"
+
+## Lexer
+### Usage
+The following initializes a new Lexer with io.Reader `r`:
+``` go
+l := html.NewLexer(r)
+```
+
+To tokenize until EOF an error, use:
+``` go
+for {
+ tt, data := l.Next()
+ switch tt {
+ case html.ErrorToken:
+ // error or EOF set in l.Err()
+ return
+ case html.StartTagToken:
+ // ...
+ for {
+ ttAttr, dataAttr := l.Next()
+ if ttAttr != html.AttributeToken {
+ break
+ }
+ // ...
+ }
+ // ...
+ }
+}
+```
+
+All tokens:
+``` go
+ErrorToken TokenType = iota // extra token when errors occur
+CommentToken
+DoctypeToken
+StartTagToken
+StartTagCloseToken
+StartTagVoidToken
+EndTagToken
+AttributeToken
+TextToken
+```
+
+### Examples
+``` go
+package main
+
+import (
+ "os"
+
+ "github.com/tdewolff/parse/html"
+)
+
+// Tokenize HTML from stdin.
+func main() {
+ l := html.NewLexer(os.Stdin)
+ for {
+ tt, data := l.Next()
+ switch tt {
+ case html.ErrorToken:
+ if l.Err() != io.EOF {
+ fmt.Println("Error on line", l.Line(), ":", l.Err())
+ }
+ return
+ case html.StartTagToken:
+ fmt.Println("Tag", string(data))
+ for {
+ ttAttr, dataAttr := l.Next()
+ if ttAttr != html.AttributeToken {
+ break
+ }
+
+ key := dataAttr
+ val := l.AttrVal()
+ fmt.Println("Attribute", string(key), "=", string(val))
+ }
+ // ...
+ }
+ }
+}
+```
+
+## License
+Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md).
+
+[1]: http://golang.org/ "Go Language"
diff --git a/vendor/github.com/tdewolff/parse/html/hash.go b/vendor/github.com/tdewolff/parse/html/hash.go
new file mode 100644
index 0000000..382e5c5
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/html/hash.go
@@ -0,0 +1,831 @@
+package html
+
+// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate
+
+// uses github.com/tdewolff/hasher
+//go:generate hasher -type=Hash -file=hash.go
+
+// Hash defines perfect hashes for a predefined list of strings
+type Hash uint32
+
+// Unique hash definitions to be used instead of strings
+const (
+ A Hash = 0x1 // a
+ Abbr Hash = 0x4 // abbr
+ Accept Hash = 0x3206 // accept
+ Accept_Charset Hash = 0x320e // accept-charset
+ Accesskey Hash = 0x4409 // accesskey
+ Acronym Hash = 0xbb07 // acronym
+ Action Hash = 0x2ba06 // action
+ Address Hash = 0x67e07 // address
+ Align Hash = 0x1605 // align
+ Alink Hash = 0xd205 // alink
+ Allowfullscreen Hash = 0x23d0f // allowfullscreen
+ Alt Hash = 0xee03 // alt
+ Annotation Hash = 0x2070a // annotation
+ AnnotationXml Hash = 0x2070d // annotationXml
+ Applet Hash = 0x14506 // applet
+ Area Hash = 0x38d04 // area
+ Article Hash = 0x40e07 // article
+ Aside Hash = 0x8305 // aside
+ Async Hash = 0xfa05 // async
+ Audio Hash = 0x11605 // audio
+ Autocomplete Hash = 0x12e0c // autocomplete
+ Autofocus Hash = 0x13a09 // autofocus
+ Autoplay Hash = 0x14f08 // autoplay
+ Axis Hash = 0x15704 // axis
+ B Hash = 0x101 // b
+ Background Hash = 0x1e0a // background
+ Base Hash = 0x45404 // base
+ Basefont Hash = 0x45408 // basefont
+ Bdi Hash = 0xcb03 // bdi
+ Bdo Hash = 0x18403 // bdo
+ Bgcolor Hash = 0x19707 // bgcolor
+ Bgsound Hash = 0x19e07 // bgsound
+ Big Hash = 0x1a603 // big
+ Blink Hash = 0x1a905 // blink
+ Blockquote Hash = 0x1ae0a // blockquote
+ Body Hash = 0x4004 // body
+ Border Hash = 0x33806 // border
+ Br Hash = 0x202 // br
+ Button Hash = 0x1b806 // button
+ Canvas Hash = 0x7f06 // canvas
+ Caption Hash = 0x27f07 // caption
+ Center Hash = 0x62a06 // center
+ Challenge Hash = 0x1e509 // challenge
+ Charset Hash = 0x3907 // charset
+ Checked Hash = 0x3b407 // checked
+ Cite Hash = 0xfe04 // cite
+ Class Hash = 0x1c305 // class
+ Classid Hash = 0x1c307 // classid
+ Clear Hash = 0x41205 // clear
+ Code Hash = 0x1d604 // code
+ Codebase Hash = 0x45008 // codebase
+ Codetype Hash = 0x1d608 // codetype
+ Col Hash = 0x19903 // col
+ Colgroup Hash = 0x1ee08 // colgroup
+ Color Hash = 0x19905 // color
+ Cols Hash = 0x20204 // cols
+ Colspan Hash = 0x20207 // colspan
+ Command Hash = 0x21407 // command
+ Compact Hash = 0x21b07 // compact
+ Content Hash = 0x4a907 // content
+ Contenteditable Hash = 0x4a90f // contenteditable
+ Contextmenu Hash = 0x3bd0b // contextmenu
+ Controls Hash = 0x22a08 // controls
+ Coords Hash = 0x23606 // coords
+ Crossorigin Hash = 0x25b0b // crossorigin
+ Data Hash = 0x4c004 // data
+ Datalist Hash = 0x4c008 // datalist
+ Datetime Hash = 0x2ea08 // datetime
+ Dd Hash = 0x31602 // dd
+ Declare Hash = 0x8607 // declare
+ Default Hash = 0x5407 // default
+ DefaultChecked Hash = 0x5040e // defaultChecked
+ DefaultMuted Hash = 0x5650c // defaultMuted
+ DefaultSelected Hash = 0x540f // defaultSelected
+ Defer Hash = 0x6205 // defer
+ Del Hash = 0x7203 // del
+ Desc Hash = 0x7c04 // desc
+ Details Hash = 0x9207 // details
+ Dfn Hash = 0xab03 // dfn
+ Dialog Hash = 0xcc06 // dialog
+ Dir Hash = 0xd903 // dir
+ Dirname Hash = 0xd907 // dirname
+ Disabled Hash = 0x10408 // disabled
+ Div Hash = 0x10b03 // div
+ Dl Hash = 0x1a402 // dl
+ Download Hash = 0x48608 // download
+ Draggable Hash = 0x1c909 // draggable
+ Dropzone Hash = 0x41908 // dropzone
+ Dt Hash = 0x60602 // dt
+ Em Hash = 0x6e02 // em
+ Embed Hash = 0x6e05 // embed
+ Enabled Hash = 0x4e07 // enabled
+ Enctype Hash = 0x2cf07 // enctype
+ Face Hash = 0x62804 // face
+ Fieldset Hash = 0x26c08 // fieldset
+ Figcaption Hash = 0x27c0a // figcaption
+ Figure Hash = 0x29006 // figure
+ Font Hash = 0x45804 // font
+ Footer Hash = 0xf106 // footer
+ For Hash = 0x29c03 // for
+ ForeignObject Hash = 0x29c0d // foreignObject
+ Foreignobject Hash = 0x2a90d // foreignobject
+ Form Hash = 0x2b604 // form
+ Formaction Hash = 0x2b60a // formaction
+ Formenctype Hash = 0x2cb0b // formenctype
+ Formmethod Hash = 0x2d60a // formmethod
+ Formnovalidate Hash = 0x2e00e // formnovalidate
+ Formtarget Hash = 0x2f50a // formtarget
+ Frame Hash = 0xa305 // frame
+ Frameborder Hash = 0x3330b // frameborder
+ Frameset Hash = 0xa308 // frameset
+ H1 Hash = 0x19502 // h1
+ H2 Hash = 0x32402 // h2
+ H3 Hash = 0x34902 // h3
+ H4 Hash = 0x38602 // h4
+ H5 Hash = 0x60802 // h5
+ H6 Hash = 0x2ff02 // h6
+ Head Hash = 0x37204 // head
+ Header Hash = 0x37206 // header
+ Headers Hash = 0x37207 // headers
+ Height Hash = 0x30106 // height
+ Hgroup Hash = 0x30906 // hgroup
+ Hidden Hash = 0x31406 // hidden
+ High Hash = 0x32104 // high
+ Hr Hash = 0xaf02 // hr
+ Href Hash = 0xaf04 // href
+ Hreflang Hash = 0xaf08 // hreflang
+ Html Hash = 0x30504 // html
+ Http_Equiv Hash = 0x3260a // http-equiv
+ I Hash = 0x601 // i
+ Icon Hash = 0x4a804 // icon
+ Id Hash = 0x8502 // id
+ Iframe Hash = 0x33206 // iframe
+ Image Hash = 0x33e05 // image
+ Img Hash = 0x34303 // img
+ Inert Hash = 0x55005 // inert
+ Input Hash = 0x47305 // input
+ Ins Hash = 0x26403 // ins
+ Isindex Hash = 0x15907 // isindex
+ Ismap Hash = 0x34b05 // ismap
+ Itemid Hash = 0xff06 // itemid
+ Itemprop Hash = 0x58808 // itemprop
+ Itemref Hash = 0x62207 // itemref
+ Itemscope Hash = 0x35609 // itemscope
+ Itemtype Hash = 0x36008 // itemtype
+ Kbd Hash = 0xca03 // kbd
+ Keygen Hash = 0x4a06 // keygen
+ Keytype Hash = 0x68807 // keytype
+ Kind Hash = 0xd604 // kind
+ Label Hash = 0x7405 // label
+ Lang Hash = 0xb304 // lang
+ Language Hash = 0xb308 // language
+ Legend Hash = 0x1d006 // legend
+ Li Hash = 0x1702 // li
+ Link Hash = 0xd304 // link
+ List Hash = 0x4c404 // list
+ Listing Hash = 0x4c407 // listing
+ Longdesc Hash = 0x7808 // longdesc
+ Loop Hash = 0x12104 // loop
+ Low Hash = 0x23f03 // low
+ Main Hash = 0x1004 // main
+ Malignmark Hash = 0xc10a // malignmark
+ Manifest Hash = 0x65e08 // manifest
+ Map Hash = 0x14403 // map
+ Mark Hash = 0xc704 // mark
+ Marquee Hash = 0x36807 // marquee
+ Math Hash = 0x36f04 // math
+ Max Hash = 0x37e03 // max
+ Maxlength Hash = 0x37e09 // maxlength
+ Media Hash = 0xde05 // media
+ Mediagroup Hash = 0xde0a // mediagroup
+ Menu Hash = 0x3c404 // menu
+ Meta Hash = 0x4d304 // meta
+ Meter Hash = 0x2f005 // meter
+ Method Hash = 0x2da06 // method
+ Mglyph Hash = 0x34406 // mglyph
+ Mi Hash = 0x2c02 // mi
+ Min Hash = 0x2c03 // min
+ Mn Hash = 0x2e302 // mn
+ Mo Hash = 0x4f702 // mo
+ Ms Hash = 0x35902 // ms
+ Mtext Hash = 0x38805 // mtext
+ Multiple Hash = 0x39608 // multiple
+ Muted Hash = 0x39e05 // muted
+ Name Hash = 0xdc04 // name
+ Nav Hash = 0x1303 // nav
+ Nobr Hash = 0x1a04 // nobr
+ Noembed Hash = 0x6c07 // noembed
+ Noframes Hash = 0xa108 // noframes
+ Nohref Hash = 0xad06 // nohref
+ Noresize Hash = 0x24b08 // noresize
+ Noscript Hash = 0x31908 // noscript
+ Noshade Hash = 0x4ff07 // noshade
+ Novalidate Hash = 0x2e40a // novalidate
+ Nowrap Hash = 0x59106 // nowrap
+ Object Hash = 0x2b006 // object
+ Ol Hash = 0x17102 // ol
+ Onabort Hash = 0x1bc07 // onabort
+ Onafterprint Hash = 0x2840c // onafterprint
+ Onbeforeprint Hash = 0x2be0d // onbeforeprint
+ Onbeforeunload Hash = 0x6720e // onbeforeunload
+ Onblur Hash = 0x17e06 // onblur
+ Oncancel Hash = 0x11a08 // oncancel
+ Oncanplay Hash = 0x18609 // oncanplay
+ Oncanplaythrough Hash = 0x18610 // oncanplaythrough
+ Onchange Hash = 0x42f08 // onchange
+ Onclick Hash = 0x6b607 // onclick
+ Onclose Hash = 0x3a307 // onclose
+ Oncontextmenu Hash = 0x3bb0d // oncontextmenu
+ Oncuechange Hash = 0x3c80b // oncuechange
+ Ondblclick Hash = 0x3d30a // ondblclick
+ Ondrag Hash = 0x3dd06 // ondrag
+ Ondragend Hash = 0x3dd09 // ondragend
+ Ondragenter Hash = 0x3e60b // ondragenter
+ Ondragleave Hash = 0x3f10b // ondragleave
+ Ondragover Hash = 0x3fc0a // ondragover
+ Ondragstart Hash = 0x4060b // ondragstart
+ Ondrop Hash = 0x41706 // ondrop
+ Ondurationchange Hash = 0x42710 // ondurationchange
+ Onemptied Hash = 0x41e09 // onemptied
+ Onended Hash = 0x43707 // onended
+ Onerror Hash = 0x43e07 // onerror
+ Onfocus Hash = 0x44507 // onfocus
+ Onhashchange Hash = 0x4650c // onhashchange
+ Oninput Hash = 0x47107 // oninput
+ Oninvalid Hash = 0x47809 // oninvalid
+ Onkeydown Hash = 0x48109 // onkeydown
+ Onkeypress Hash = 0x48e0a // onkeypress
+ Onkeyup Hash = 0x49e07 // onkeyup
+ Onload Hash = 0x4b806 // onload
+ Onloadeddata Hash = 0x4b80c // onloadeddata
+ Onloadedmetadata Hash = 0x4cb10 // onloadedmetadata
+ Onloadstart Hash = 0x4e10b // onloadstart
+ Onmessage Hash = 0x4ec09 // onmessage
+ Onmousedown Hash = 0x4f50b // onmousedown
+ Onmousemove Hash = 0x5120b // onmousemove
+ Onmouseout Hash = 0x51d0a // onmouseout
+ Onmouseover Hash = 0x52a0b // onmouseover
+ Onmouseup Hash = 0x53509 // onmouseup
+ Onmousewheel Hash = 0x53e0c // onmousewheel
+ Onoffline Hash = 0x54a09 // onoffline
+ Ononline Hash = 0x55508 // ononline
+ Onpagehide Hash = 0x55d0a // onpagehide
+ Onpageshow Hash = 0x5710a // onpageshow
+ Onpause Hash = 0x57d07 // onpause
+ Onplay Hash = 0x59c06 // onplay
+ Onplaying Hash = 0x59c09 // onplaying
+ Onpopstate Hash = 0x5a50a // onpopstate
+ Onprogress Hash = 0x5af0a // onprogress
+ Onratechange Hash = 0x5be0c // onratechange
+ Onreset Hash = 0x5ca07 // onreset
+ Onresize Hash = 0x5d108 // onresize
+ Onscroll Hash = 0x5d908 // onscroll
+ Onseeked Hash = 0x5e408 // onseeked
+ Onseeking Hash = 0x5ec09 // onseeking
+ Onselect Hash = 0x5f508 // onselect
+ Onshow Hash = 0x5ff06 // onshow
+ Onstalled Hash = 0x60a09 // onstalled
+ Onstorage Hash = 0x61309 // onstorage
+ Onsubmit Hash = 0x61c08 // onsubmit
+ Onsuspend Hash = 0x63009 // onsuspend
+ Ontimeupdate Hash = 0x4590c // ontimeupdate
+ Onunload Hash = 0x63908 // onunload
+ Onvolumechange Hash = 0x6410e // onvolumechange
+ Onwaiting Hash = 0x64f09 // onwaiting
+ Open Hash = 0x58e04 // open
+ Optgroup Hash = 0x12308 // optgroup
+ Optimum Hash = 0x65807 // optimum
+ Option Hash = 0x66e06 // option
+ Output Hash = 0x52406 // output
+ P Hash = 0xc01 // p
+ Param Hash = 0xc05 // param
+ Pattern Hash = 0x9b07 // pattern
+ Pauseonexit Hash = 0x57f0b // pauseonexit
+ Picture Hash = 0xe707 // picture
+ Ping Hash = 0x12a04 // ping
+ Placeholder Hash = 0x16b0b // placeholder
+ Plaintext Hash = 0x1f509 // plaintext
+ Poster Hash = 0x30e06 // poster
+ Pre Hash = 0x34f03 // pre
+ Preload Hash = 0x34f07 // preload
+ Profile Hash = 0x66707 // profile
+ Progress Hash = 0x5b108 // progress
+ Prompt Hash = 0x59606 // prompt
+ Public Hash = 0x4a406 // public
+ Q Hash = 0x8d01 // q
+ Radiogroup Hash = 0x30a // radiogroup
+ Rb Hash = 0x1d02 // rb
+ Readonly Hash = 0x38e08 // readonly
+ Rel Hash = 0x35003 // rel
+ Required Hash = 0x8b08 // required
+ Rev Hash = 0x29403 // rev
+ Reversed Hash = 0x29408 // reversed
+ Rows Hash = 0x6604 // rows
+ Rowspan Hash = 0x6607 // rowspan
+ Rp Hash = 0x28a02 // rp
+ Rt Hash = 0x1c102 // rt
+ Rtc Hash = 0x1c103 // rtc
+ Ruby Hash = 0xf604 // ruby
+ Rules Hash = 0x17505 // rules
+ S Hash = 0x3d01 // s
+ Samp Hash = 0x9804 // samp
+ Sandbox Hash = 0x16307 // sandbox
+ Scope Hash = 0x35a05 // scope
+ Scoped Hash = 0x35a06 // scoped
+ Script Hash = 0x31b06 // script
+ Scrolling Hash = 0x5db09 // scrolling
+ Seamless Hash = 0x3a808 // seamless
+ Section Hash = 0x17907 // section
+ Select Hash = 0x5f706 // select
+ Selected Hash = 0x5f708 // selected
+ Shape Hash = 0x23105 // shape
+ Size Hash = 0x24f04 // size
+ Sizes Hash = 0x24f05 // sizes
+ Small Hash = 0x23b05 // small
+ Sortable Hash = 0x25308 // sortable
+ Source Hash = 0x26606 // source
+ Spacer Hash = 0x37806 // spacer
+ Span Hash = 0x6904 // span
+ Spellcheck Hash = 0x3af0a // spellcheck
+ Src Hash = 0x44b03 // src
+ Srcdoc Hash = 0x44b06 // srcdoc
+ Srclang Hash = 0x49707 // srclang
+ Srcset Hash = 0x5b806 // srcset
+ Start Hash = 0x40c05 // start
+ Step Hash = 0x66404 // step
+ Strike Hash = 0x68406 // strike
+ Strong Hash = 0x68f06 // strong
+ Style Hash = 0x69505 // style
+ Sub Hash = 0x61e03 // sub
+ Summary Hash = 0x69a07 // summary
+ Sup Hash = 0x6a103 // sup
+ Svg Hash = 0x6a403 // svg
+ System Hash = 0x6a706 // system
+ Tabindex Hash = 0x4d908 // tabindex
+ Table Hash = 0x25605 // table
+ Target Hash = 0x2f906 // target
+ Tbody Hash = 0x3f05 // tbody
+ Td Hash = 0xaa02 // td
+ Template Hash = 0x6aa08 // template
+ Text Hash = 0x1fa04 // text
+ Textarea Hash = 0x38908 // textarea
+ Tfoot Hash = 0xf005 // tfoot
+ Th Hash = 0x18f02 // th
+ Thead Hash = 0x37105 // thead
+ Time Hash = 0x2ee04 // time
+ Title Hash = 0x14a05 // title
+ Tr Hash = 0x1fd02 // tr
+ Track Hash = 0x1fd05 // track
+ Translate Hash = 0x22109 // translate
+ Truespeed Hash = 0x27309 // truespeed
+ Tt Hash = 0x9d02 // tt
+ Type Hash = 0x11204 // type
+ Typemustmatch Hash = 0x1da0d // typemustmatch
+ U Hash = 0xb01 // u
+ Ul Hash = 0x5802 // ul
+ Undeterminate Hash = 0x250d // undeterminate
+ Usemap Hash = 0x14106 // usemap
+ Valign Hash = 0x1506 // valign
+ Value Hash = 0x10d05 // value
+ Valuetype Hash = 0x10d09 // valuetype
+ Var Hash = 0x32f03 // var
+ Video Hash = 0x6b205 // video
+ Visible Hash = 0x6bd07 // visible
+ Vlink Hash = 0x6c405 // vlink
+ Wbr Hash = 0x57a03 // wbr
+ Width Hash = 0x60405 // width
+ Wrap Hash = 0x59304 // wrap
+ Xmlns Hash = 0x15f05 // xmlns
+ Xmp Hash = 0x16903 // xmp
+)
+
+// String returns the hash' name.
+func (i Hash) String() string {
+ start := uint32(i >> 8)
+ n := uint32(i & 0xff)
+ if start+n > uint32(len(_Hash_text)) {
+ return ""
+ }
+ return _Hash_text[start : start+n]
+}
+
+// ToHash returns the hash whose name is s. It returns zero if there is no
+// such hash. It is case sensitive.
+func ToHash(s []byte) Hash {
+ if len(s) == 0 || len(s) > _Hash_maxLen {
+ return 0
+ }
+ h := uint32(_Hash_hash0)
+ for i := 0; i < len(s); i++ {
+ h ^= uint32(s[i])
+ h *= 16777619
+ }
+ if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) {
+ t := _Hash_text[i>>8 : i>>8+i&0xff]
+ for i := 0; i < len(s); i++ {
+ if t[i] != s[i] {
+ goto NEXT
+ }
+ }
+ return i
+ }
+NEXT:
+ if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) {
+ t := _Hash_text[i>>8 : i>>8+i&0xff]
+ for i := 0; i < len(s); i++ {
+ if t[i] != s[i] {
+ return 0
+ }
+ }
+ return i
+ }
+ return 0
+}
+
+const _Hash_hash0 = 0x5334b67c
+const _Hash_maxLen = 16
+const _Hash_text = "abbradiogrouparamainavalignobrbackgroundeterminateaccept-cha" +
+ "rsetbodyaccesskeygenabledefaultSelectedeferowspanoembedelabe" +
+ "longdescanvasideclarequiredetailsampatternoframesetdfnohrefl" +
+ "anguageacronymalignmarkbdialogalinkindirnamediagroupictureal" +
+ "tfooterubyasyncitemidisabledivaluetypeaudioncancelooptgroupi" +
+ "ngautocompleteautofocusemappletitleautoplayaxisindexmlnsandb" +
+ "oxmplaceholderulesectionblurbdoncanplaythrough1bgcolorbgsoun" +
+ "dlbigblinkblockquotebuttonabortclassidraggablegendcodetypemu" +
+ "stmatchallengecolgrouplaintextrackcolspannotationXmlcommandc" +
+ "ompactranslatecontrolshapecoordsmallowfullscreenoresizesorta" +
+ "blecrossoriginsourcefieldsetruespeedfigcaptionafterprintfigu" +
+ "reversedforeignObjectforeignobjectformactionbeforeprintforme" +
+ "nctypeformmethodformnovalidatetimeterformtargeth6heightmlhgr" +
+ "ouposterhiddenoscripthigh2http-equivariframeborderimageimgly" +
+ "ph3ismapreloaditemscopeditemtypemarqueematheaderspacermaxlen" +
+ "gth4mtextareadonlymultiplemutedoncloseamlesspellcheckedoncon" +
+ "textmenuoncuechangeondblclickondragendondragenterondragleave" +
+ "ondragoverondragstarticlearondropzonemptiedondurationchangeo" +
+ "nendedonerroronfocusrcdocodebasefontimeupdateonhashchangeoni" +
+ "nputoninvalidonkeydownloadonkeypressrclangonkeyupublicontent" +
+ "editableonloadeddatalistingonloadedmetadatabindexonloadstart" +
+ "onmessageonmousedownoshadefaultCheckedonmousemoveonmouseoutp" +
+ "utonmouseoveronmouseuponmousewheelonofflinertononlineonpageh" +
+ "idefaultMutedonpageshowbronpauseonexitempropenowrapromptonpl" +
+ "ayingonpopstateonprogressrcsetonratechangeonresetonresizeons" +
+ "crollingonseekedonseekingonselectedonshowidth5onstalledonsto" +
+ "rageonsubmitemrefacenteronsuspendonunloadonvolumechangeonwai" +
+ "tingoptimumanifesteprofileoptionbeforeunloaddresstrikeytypes" +
+ "trongstylesummarysupsvgsystemplatevideonclickvisiblevlink"
+
+var _Hash_table = [1 << 9]Hash{
+ 0x0: 0x2cb0b, // formenctype
+ 0x1: 0x2d60a, // formmethod
+ 0x2: 0x3c80b, // oncuechange
+ 0x3: 0x3dd06, // ondrag
+ 0x6: 0x68406, // strike
+ 0x7: 0x6b205, // video
+ 0x9: 0x4a907, // content
+ 0xa: 0x4e07, // enabled
+ 0xb: 0x59106, // nowrap
+ 0xc: 0xd304, // link
+ 0xe: 0x28a02, // rp
+ 0xf: 0x2840c, // onafterprint
+ 0x10: 0x14506, // applet
+ 0x11: 0xf005, // tfoot
+ 0x12: 0x5040e, // defaultChecked
+ 0x13: 0x3330b, // frameborder
+ 0x14: 0xf106, // footer
+ 0x15: 0x5f708, // selected
+ 0x16: 0x49707, // srclang
+ 0x18: 0x52a0b, // onmouseover
+ 0x19: 0x1d604, // code
+ 0x1b: 0x47809, // oninvalid
+ 0x1c: 0x62804, // face
+ 0x1e: 0x3bd0b, // contextmenu
+ 0x1f: 0xa308, // frameset
+ 0x21: 0x5650c, // defaultMuted
+ 0x22: 0x19905, // color
+ 0x23: 0x59c06, // onplay
+ 0x25: 0x2f005, // meter
+ 0x26: 0x61309, // onstorage
+ 0x27: 0x38e08, // readonly
+ 0x29: 0x66707, // profile
+ 0x2a: 0x8607, // declare
+ 0x2b: 0xb01, // u
+ 0x2c: 0x31908, // noscript
+ 0x2d: 0x65e08, // manifest
+ 0x2e: 0x1b806, // button
+ 0x2f: 0x2ea08, // datetime
+ 0x30: 0x47305, // input
+ 0x31: 0x5407, // default
+ 0x32: 0x1d608, // codetype
+ 0x33: 0x2a90d, // foreignobject
+ 0x34: 0x36807, // marquee
+ 0x36: 0x19707, // bgcolor
+ 0x37: 0x19502, // h1
+ 0x39: 0x1e0a, // background
+ 0x3b: 0x2f50a, // formtarget
+ 0x41: 0x2f906, // target
+ 0x43: 0x23b05, // small
+ 0x44: 0x45008, // codebase
+ 0x45: 0x55005, // inert
+ 0x47: 0x38805, // mtext
+ 0x48: 0x6607, // rowspan
+ 0x49: 0x2be0d, // onbeforeprint
+ 0x4a: 0x55508, // ononline
+ 0x4c: 0x29006, // figure
+ 0x4d: 0x4cb10, // onloadedmetadata
+ 0x4e: 0xbb07, // acronym
+ 0x50: 0x39608, // multiple
+ 0x51: 0x320e, // accept-charset
+ 0x52: 0x24f05, // sizes
+ 0x53: 0x29c0d, // foreignObject
+ 0x55: 0x2e40a, // novalidate
+ 0x56: 0x55d0a, // onpagehide
+ 0x57: 0x2e302, // mn
+ 0x58: 0x38602, // h4
+ 0x5a: 0x1c102, // rt
+ 0x5b: 0xd205, // alink
+ 0x5e: 0x59606, // prompt
+ 0x5f: 0x17102, // ol
+ 0x61: 0x5d108, // onresize
+ 0x64: 0x69a07, // summary
+ 0x65: 0x5a50a, // onpopstate
+ 0x66: 0x38d04, // area
+ 0x68: 0x64f09, // onwaiting
+ 0x6b: 0xdc04, // name
+ 0x6c: 0x23606, // coords
+ 0x6d: 0x34303, // img
+ 0x6e: 0x66404, // step
+ 0x6f: 0x5ec09, // onseeking
+ 0x70: 0x32104, // high
+ 0x71: 0x49e07, // onkeyup
+ 0x72: 0x5f706, // select
+ 0x73: 0x1fd05, // track
+ 0x74: 0x34b05, // ismap
+ 0x76: 0x47107, // oninput
+ 0x77: 0x8d01, // q
+ 0x78: 0x48109, // onkeydown
+ 0x79: 0x33e05, // image
+ 0x7a: 0x2b604, // form
+ 0x7b: 0x60a09, // onstalled
+ 0x7c: 0xe707, // picture
+ 0x7d: 0x42f08, // onchange
+ 0x7e: 0x1a905, // blink
+ 0x7f: 0xee03, // alt
+ 0x80: 0xfa05, // async
+ 0x82: 0x1702, // li
+ 0x84: 0x2c02, // mi
+ 0x85: 0xff06, // itemid
+ 0x86: 0x11605, // audio
+ 0x87: 0x31b06, // script
+ 0x8b: 0x44b06, // srcdoc
+ 0x8e: 0xc704, // mark
+ 0x8f: 0x18403, // bdo
+ 0x91: 0x5120b, // onmousemove
+ 0x93: 0x3c404, // menu
+ 0x94: 0x45804, // font
+ 0x95: 0x14f08, // autoplay
+ 0x96: 0x6c405, // vlink
+ 0x98: 0x6e02, // em
+ 0x9a: 0x5b806, // srcset
+ 0x9b: 0x1ee08, // colgroup
+ 0x9c: 0x58e04, // open
+ 0x9d: 0x1d006, // legend
+ 0x9e: 0x4e10b, // onloadstart
+ 0xa2: 0x22109, // translate
+ 0xa3: 0x6e05, // embed
+ 0xa4: 0x1c305, // class
+ 0xa6: 0x6aa08, // template
+ 0xa7: 0x37206, // header
+ 0xa9: 0x4b806, // onload
+ 0xaa: 0x37105, // thead
+ 0xab: 0x5db09, // scrolling
+ 0xac: 0xc05, // param
+ 0xae: 0x9b07, // pattern
+ 0xaf: 0x9207, // details
+ 0xb1: 0x4a406, // public
+ 0xb3: 0x4f50b, // onmousedown
+ 0xb4: 0x14403, // map
+ 0xb6: 0x25b0b, // crossorigin
+ 0xb7: 0x1506, // valign
+ 0xb9: 0x1bc07, // onabort
+ 0xba: 0x66e06, // option
+ 0xbb: 0x26606, // source
+ 0xbc: 0x6205, // defer
+ 0xbd: 0x1e509, // challenge
+ 0xbf: 0x10d05, // value
+ 0xc0: 0x23d0f, // allowfullscreen
+ 0xc1: 0xca03, // kbd
+ 0xc2: 0x2070d, // annotationXml
+ 0xc3: 0x5be0c, // onratechange
+ 0xc4: 0x4f702, // mo
+ 0xc6: 0x3af0a, // spellcheck
+ 0xc7: 0x2c03, // min
+ 0xc8: 0x4b80c, // onloadeddata
+ 0xc9: 0x41205, // clear
+ 0xca: 0x42710, // ondurationchange
+ 0xcb: 0x1a04, // nobr
+ 0xcd: 0x27309, // truespeed
+ 0xcf: 0x30906, // hgroup
+ 0xd0: 0x40c05, // start
+ 0xd3: 0x41908, // dropzone
+ 0xd5: 0x7405, // label
+ 0xd8: 0xde0a, // mediagroup
+ 0xd9: 0x17e06, // onblur
+ 0xdb: 0x27f07, // caption
+ 0xdd: 0x7c04, // desc
+ 0xde: 0x15f05, // xmlns
+ 0xdf: 0x30106, // height
+ 0xe0: 0x21407, // command
+ 0xe2: 0x57f0b, // pauseonexit
+ 0xe3: 0x68f06, // strong
+ 0xe4: 0x43e07, // onerror
+ 0xe5: 0x61c08, // onsubmit
+ 0xe6: 0xb308, // language
+ 0xe7: 0x48608, // download
+ 0xe9: 0x53509, // onmouseup
+ 0xec: 0x2cf07, // enctype
+ 0xed: 0x5f508, // onselect
+ 0xee: 0x2b006, // object
+ 0xef: 0x1f509, // plaintext
+ 0xf0: 0x3d30a, // ondblclick
+ 0xf1: 0x18610, // oncanplaythrough
+ 0xf2: 0xd903, // dir
+ 0xf3: 0x38908, // textarea
+ 0xf4: 0x12a04, // ping
+ 0xf5: 0x2da06, // method
+ 0xf6: 0x22a08, // controls
+ 0xf7: 0x37806, // spacer
+ 0xf8: 0x6a403, // svg
+ 0xf9: 0x30504, // html
+ 0xfa: 0x3d01, // s
+ 0xfc: 0xcc06, // dialog
+ 0xfe: 0x1da0d, // typemustmatch
+ 0xff: 0x3b407, // checked
+ 0x101: 0x30e06, // poster
+ 0x102: 0x3260a, // http-equiv
+ 0x103: 0x44b03, // src
+ 0x104: 0x10408, // disabled
+ 0x105: 0x37207, // headers
+ 0x106: 0x5af0a, // onprogress
+ 0x107: 0x26c08, // fieldset
+ 0x108: 0x32f03, // var
+ 0x10a: 0xa305, // frame
+ 0x10b: 0x36008, // itemtype
+ 0x10c: 0x3fc0a, // ondragover
+ 0x10d: 0x13a09, // autofocus
+ 0x10f: 0x601, // i
+ 0x110: 0x35902, // ms
+ 0x111: 0x45404, // base
+ 0x113: 0x35a05, // scope
+ 0x114: 0x3206, // accept
+ 0x115: 0x58808, // itemprop
+ 0x117: 0xfe04, // cite
+ 0x118: 0x3907, // charset
+ 0x119: 0x14a05, // title
+ 0x11a: 0x68807, // keytype
+ 0x11b: 0x1fa04, // text
+ 0x11c: 0x65807, // optimum
+ 0x11e: 0x37204, // head
+ 0x121: 0x21b07, // compact
+ 0x123: 0x63009, // onsuspend
+ 0x124: 0x4c404, // list
+ 0x125: 0x4590c, // ontimeupdate
+ 0x126: 0x62a06, // center
+ 0x127: 0x31406, // hidden
+ 0x129: 0x35609, // itemscope
+ 0x12c: 0x1a402, // dl
+ 0x12d: 0x17907, // section
+ 0x12e: 0x11a08, // oncancel
+ 0x12f: 0x6b607, // onclick
+ 0x130: 0xde05, // media
+ 0x131: 0x52406, // output
+ 0x132: 0x4c008, // datalist
+ 0x133: 0x53e0c, // onmousewheel
+ 0x134: 0x45408, // basefont
+ 0x135: 0x37e09, // maxlength
+ 0x136: 0x6bd07, // visible
+ 0x137: 0x2e00e, // formnovalidate
+ 0x139: 0x16903, // xmp
+ 0x13a: 0x101, // b
+ 0x13b: 0x5710a, // onpageshow
+ 0x13c: 0xf604, // ruby
+ 0x13d: 0x16b0b, // placeholder
+ 0x13e: 0x4c407, // listing
+ 0x140: 0x26403, // ins
+ 0x141: 0x62207, // itemref
+ 0x144: 0x540f, // defaultSelected
+ 0x146: 0x3f10b, // ondragleave
+ 0x147: 0x1ae0a, // blockquote
+ 0x148: 0x59304, // wrap
+ 0x14a: 0x1a603, // big
+ 0x14b: 0x35003, // rel
+ 0x14c: 0x41706, // ondrop
+ 0x14e: 0x6a706, // system
+ 0x14f: 0x30a, // radiogroup
+ 0x150: 0x25605, // table
+ 0x152: 0x57a03, // wbr
+ 0x153: 0x3bb0d, // oncontextmenu
+ 0x155: 0x250d, // undeterminate
+ 0x157: 0x20204, // cols
+ 0x158: 0x16307, // sandbox
+ 0x159: 0x1303, // nav
+ 0x15a: 0x37e03, // max
+ 0x15b: 0x7808, // longdesc
+ 0x15c: 0x60405, // width
+ 0x15d: 0x34902, // h3
+ 0x15e: 0x19e07, // bgsound
+ 0x161: 0x10d09, // valuetype
+ 0x162: 0x69505, // style
+ 0x164: 0x3f05, // tbody
+ 0x165: 0x40e07, // article
+ 0x169: 0xcb03, // bdi
+ 0x16a: 0x67e07, // address
+ 0x16b: 0x23105, // shape
+ 0x16c: 0x2ba06, // action
+ 0x16e: 0x1fd02, // tr
+ 0x16f: 0xaa02, // td
+ 0x170: 0x3dd09, // ondragend
+ 0x171: 0x5802, // ul
+ 0x172: 0x33806, // border
+ 0x174: 0x4a06, // keygen
+ 0x175: 0x4004, // body
+ 0x177: 0x1c909, // draggable
+ 0x178: 0x2b60a, // formaction
+ 0x17b: 0x34406, // mglyph
+ 0x17d: 0x1d02, // rb
+ 0x17e: 0x2ff02, // h6
+ 0x17f: 0x41e09, // onemptied
+ 0x180: 0x5ca07, // onreset
+ 0x181: 0x1004, // main
+ 0x182: 0x12104, // loop
+ 0x183: 0x48e0a, // onkeypress
+ 0x184: 0x9d02, // tt
+ 0x186: 0x20207, // colspan
+ 0x188: 0x36f04, // math
+ 0x189: 0x1605, // align
+ 0x18a: 0xa108, // noframes
+ 0x18b: 0xaf02, // hr
+ 0x18c: 0xc10a, // malignmark
+ 0x18e: 0x23f03, // low
+ 0x18f: 0x8502, // id
+ 0x190: 0x6604, // rows
+ 0x191: 0x29403, // rev
+ 0x192: 0x63908, // onunload
+ 0x193: 0x39e05, // muted
+ 0x194: 0x35a06, // scoped
+ 0x195: 0x31602, // dd
+ 0x196: 0x60602, // dt
+ 0x197: 0x6720e, // onbeforeunload
+ 0x199: 0x2070a, // annotation
+ 0x19a: 0x29408, // reversed
+ 0x19c: 0x11204, // type
+ 0x19d: 0x57d07, // onpause
+ 0x19e: 0xd604, // kind
+ 0x19f: 0x4c004, // data
+ 0x1a0: 0x4ff07, // noshade
+ 0x1a3: 0x17505, // rules
+ 0x1a4: 0x12308, // optgroup
+ 0x1a5: 0x202, // br
+ 0x1a7: 0x1, // a
+ 0x1a8: 0x51d0a, // onmouseout
+ 0x1aa: 0x54a09, // onoffline
+ 0x1ab: 0x6410e, // onvolumechange
+ 0x1ae: 0x61e03, // sub
+ 0x1b3: 0x29c03, // for
+ 0x1b5: 0x8b08, // required
+ 0x1b6: 0x5b108, // progress
+ 0x1b7: 0x14106, // usemap
+ 0x1b8: 0x7f06, // canvas
+ 0x1b9: 0x4a804, // icon
+ 0x1bb: 0x1c103, // rtc
+ 0x1bc: 0x8305, // aside
+ 0x1bd: 0x2ee04, // time
+ 0x1be: 0x4060b, // ondragstart
+ 0x1c0: 0x27c0a, // figcaption
+ 0x1c1: 0xaf04, // href
+ 0x1c2: 0x33206, // iframe
+ 0x1c3: 0x18609, // oncanplay
+ 0x1c4: 0x6904, // span
+ 0x1c5: 0x34f03, // pre
+ 0x1c6: 0x6c07, // noembed
+ 0x1c8: 0x5e408, // onseeked
+ 0x1c9: 0x4d304, // meta
+ 0x1ca: 0x32402, // h2
+ 0x1cb: 0x3a808, // seamless
+ 0x1cc: 0xab03, // dfn
+ 0x1cd: 0x15704, // axis
+ 0x1cf: 0x3e60b, // ondragenter
+ 0x1d0: 0x18f02, // th
+ 0x1d1: 0x4650c, // onhashchange
+ 0x1d2: 0xb304, // lang
+ 0x1d3: 0x44507, // onfocus
+ 0x1d5: 0x24f04, // size
+ 0x1d8: 0x12e0c, // autocomplete
+ 0x1d9: 0xaf08, // hreflang
+ 0x1da: 0x9804, // samp
+ 0x1de: 0x19903, // col
+ 0x1df: 0x10b03, // div
+ 0x1e0: 0x25308, // sortable
+ 0x1e1: 0x7203, // del
+ 0x1e3: 0x3a307, // onclose
+ 0x1e6: 0xd907, // dirname
+ 0x1e8: 0x1c307, // classid
+ 0x1e9: 0x34f07, // preload
+ 0x1ea: 0x4d908, // tabindex
+ 0x1eb: 0x60802, // h5
+ 0x1ec: 0x5d908, // onscroll
+ 0x1ed: 0x4a90f, // contenteditable
+ 0x1ee: 0x4ec09, // onmessage
+ 0x1ef: 0x4, // abbr
+ 0x1f0: 0x15907, // isindex
+ 0x1f1: 0x6a103, // sup
+ 0x1f3: 0x24b08, // noresize
+ 0x1f5: 0x59c09, // onplaying
+ 0x1f6: 0x4409, // accesskey
+ 0x1fa: 0xc01, // p
+ 0x1fb: 0x43707, // onended
+ 0x1fc: 0x5ff06, // onshow
+ 0x1fe: 0xad06, // nohref
+}
diff --git a/vendor/github.com/tdewolff/parse/html/hash_test.go b/vendor/github.com/tdewolff/parse/html/hash_test.go
new file mode 100644
index 0000000..c905ba3
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/html/hash_test.go
@@ -0,0 +1,58 @@
+package html // import "github.com/tdewolff/parse/html"
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/tdewolff/test"
+)
+
+func TestHashTable(t *testing.T) {
+ test.T(t, ToHash([]byte("address")), Address, "'address' must resolve to Address")
+ test.T(t, Address.String(), "address")
+ test.T(t, Accept_Charset.String(), "accept-charset")
+ test.T(t, ToHash([]byte("")), Hash(0), "empty string must resolve to zero")
+ test.T(t, Hash(0xffffff).String(), "")
+ test.T(t, ToHash([]byte("iter")), Hash(0), "'iter' must resolve to zero")
+ test.T(t, ToHash([]byte("test")), Hash(0), "'test' must resolve to zero")
+}
+
+////////////////////////////////////////////////////////////////
+
+var result int
+
+// naive scenario
+func BenchmarkCompareBytes(b *testing.B) {
+ var r int
+ val := []byte("span")
+ for n := 0; n < b.N; n++ {
+ if bytes.Equal(val, []byte("span")) {
+ r++
+ }
+ }
+ result = r
+}
+
+// using-atoms scenario
+func BenchmarkFindAndCompareAtom(b *testing.B) {
+ var r int
+ val := []byte("span")
+ for n := 0; n < b.N; n++ {
+ if ToHash(val) == Span {
+ r++
+ }
+ }
+ result = r
+}
+
+// using-atoms worst-case scenario
+func BenchmarkFindAtomCompareBytes(b *testing.B) {
+ var r int
+ val := []byte("zzzz")
+ for n := 0; n < b.N; n++ {
+ if h := ToHash(val); h == 0 && bytes.Equal(val, []byte("zzzz")) {
+ r++
+ }
+ }
+ result = r
+}
diff --git a/vendor/github.com/tdewolff/parse/html/lex.go b/vendor/github.com/tdewolff/parse/html/lex.go
new file mode 100644
index 0000000..c81490a
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/html/lex.go
@@ -0,0 +1,485 @@
+// Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
+package html // import "github.com/tdewolff/parse/html"
+
+import (
+ "io"
+ "strconv"
+
+ "github.com/tdewolff/parse"
+ "github.com/tdewolff/parse/buffer"
+)
+
+// TokenType determines the type of token, eg. a number or a semicolon.
+type TokenType uint32
+
+// TokenType values.
+const (
+ ErrorToken TokenType = iota // extra token when errors occur
+ CommentToken
+ DoctypeToken
+ StartTagToken
+ StartTagCloseToken
+ StartTagVoidToken
+ EndTagToken
+ AttributeToken
+ TextToken
+ SvgToken
+ MathToken
+)
+
+// String returns the string representation of a TokenType.
+func (tt TokenType) String() string {
+ switch tt {
+ case ErrorToken:
+ return "Error"
+ case CommentToken:
+ return "Comment"
+ case DoctypeToken:
+ return "Doctype"
+ case StartTagToken:
+ return "StartTag"
+ case StartTagCloseToken:
+ return "StartTagClose"
+ case StartTagVoidToken:
+ return "StartTagVoid"
+ case EndTagToken:
+ return "EndTag"
+ case AttributeToken:
+ return "Attribute"
+ case TextToken:
+ return "Text"
+ case SvgToken:
+ return "Svg"
+ case MathToken:
+ return "Math"
+ }
+ return "Invalid(" + strconv.Itoa(int(tt)) + ")"
+}
+
+////////////////////////////////////////////////////////////////
+
+// Lexer is the state for the lexer.
+type Lexer struct {
+ r *buffer.Lexer
+ err error
+
+ rawTag Hash
+ inTag bool
+
+ text []byte
+ attrVal []byte
+}
+
+// NewLexer returns a new Lexer for a given io.Reader.
+func NewLexer(r io.Reader) *Lexer {
+ return &Lexer{
+ r: buffer.NewLexer(r),
+ }
+}
+
+// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
+func (l *Lexer) Err() error {
+ if err := l.r.Err(); err != nil {
+ return err
+ }
+ return l.err
+}
+
+// Restore restores the NULL byte at the end of the buffer.
+func (l *Lexer) Restore() {
+ l.r.Restore()
+}
+
+// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
+func (l *Lexer) Next() (TokenType, []byte) {
+ l.text = nil
+ var c byte
+ if l.inTag {
+ l.attrVal = nil
+ for { // before attribute name state
+ if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+ l.r.Move(1)
+ continue
+ }
+ break
+ }
+ if c == 0 {
+ l.err = parse.NewErrorLexer("unexpected null character", l.r)
+ return ErrorToken, nil
+ } else if c != '>' && (c != '/' || l.r.Peek(1) != '>') {
+ return AttributeToken, l.shiftAttribute()
+ }
+ start := l.r.Pos()
+ l.inTag = false
+ if c == '/' {
+ l.r.Move(2)
+ l.text = l.r.Lexeme()[start:]
+ return StartTagVoidToken, l.r.Shift()
+ }
+ l.r.Move(1)
+ l.text = l.r.Lexeme()[start:]
+ return StartTagCloseToken, l.r.Shift()
+ }
+
+ if l.rawTag != 0 {
+ if rawText := l.shiftRawText(); len(rawText) > 0 {
+ l.rawTag = 0
+ return TextToken, rawText
+ }
+ l.rawTag = 0
+ }
+
+ for {
+ c = l.r.Peek(0)
+ if c == '<' {
+ c = l.r.Peek(1)
+ if l.r.Pos() > 0 {
+ if c == '/' && l.r.Peek(2) != 0 || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
+ return TextToken, l.r.Shift()
+ }
+ } else if c == '/' && l.r.Peek(2) != 0 {
+ l.r.Move(2)
+ if c = l.r.Peek(0); c != '>' && !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+ return CommentToken, l.shiftBogusComment()
+ }
+ return EndTagToken, l.shiftEndTag()
+ } else if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
+ l.r.Move(1)
+ l.inTag = true
+ return l.shiftStartTag()
+ } else if c == '!' {
+ l.r.Move(2)
+ return l.readMarkup()
+ } else if c == '?' {
+ l.r.Move(1)
+ return CommentToken, l.shiftBogusComment()
+ }
+ } else if c == 0 {
+ if l.r.Pos() > 0 {
+ return TextToken, l.r.Shift()
+ }
+ l.err = parse.NewErrorLexer("unexpected null character", l.r)
+ return ErrorToken, nil
+ }
+ l.r.Move(1)
+ }
+}
+
+// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
+func (l *Lexer) Text() []byte {
+ return l.text
+}
+
+// AttrVal returns the attribute value when an AttributeToken was returned from Next.
+func (l *Lexer) AttrVal() []byte {
+ return l.attrVal
+}
+
+////////////////////////////////////////////////////////////////
+
+// The following functions follow the specifications at http://www.w3.org/html/wg/drafts/html/master/syntax.html
+
+func (l *Lexer) shiftRawText() []byte {
+ if l.rawTag == Plaintext {
+ for {
+ if l.r.Peek(0) == 0 {
+ return l.r.Shift()
+ }
+ l.r.Move(1)
+ }
+ } else { // RCDATA, RAWTEXT and SCRIPT
+ for {
+ c := l.r.Peek(0)
+ if c == '<' {
+ if l.r.Peek(1) == '/' {
+ mark := l.r.Pos()
+ l.r.Move(2)
+ for {
+ if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+ break
+ }
+ l.r.Move(1)
+ }
+ if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == l.rawTag { // copy so that ToLower doesn't change the case of the underlying slice
+ l.r.Rewind(mark)
+ return l.r.Shift()
+ }
+ } else if l.rawTag == Script && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' {
+ l.r.Move(4)
+ inScript := false
+ for {
+ c := l.r.Peek(0)
+ if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' {
+ l.r.Move(3)
+ break
+ } else if c == '<' {
+ isEnd := l.r.Peek(1) == '/'
+ if isEnd {
+ l.r.Move(2)
+ } else {
+ l.r.Move(1)
+ }
+ mark := l.r.Pos()
+ for {
+ if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+ break
+ }
+ l.r.Move(1)
+ }
+ if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark:]))); h == Script { // copy so that ToLower doesn't change the case of the underlying slice
+ if !isEnd {
+ inScript = true
+ } else {
+ if !inScript {
+ l.r.Rewind(mark - 2)
+ return l.r.Shift()
+ }
+ inScript = false
+ }
+ }
+ } else if c == 0 {
+ return l.r.Shift()
+ }
+ l.r.Move(1)
+ }
+ } else {
+ l.r.Move(1)
+ }
+ } else if c == 0 {
+ return l.r.Shift()
+ } else {
+ l.r.Move(1)
+ }
+ }
+ }
+}
+
+func (l *Lexer) readMarkup() (TokenType, []byte) {
+ if l.at('-', '-') {
+ l.r.Move(2)
+ for {
+ if l.r.Peek(0) == 0 {
+ return CommentToken, l.r.Shift()
+ } else if l.at('-', '-', '>') {
+ l.text = l.r.Lexeme()[4:]
+ l.r.Move(3)
+ return CommentToken, l.r.Shift()
+ } else if l.at('-', '-', '!', '>') {
+ l.text = l.r.Lexeme()[4:]
+ l.r.Move(4)
+ return CommentToken, l.r.Shift()
+ }
+ l.r.Move(1)
+ }
+ } else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') {
+ l.r.Move(7)
+ for {
+ if l.r.Peek(0) == 0 {
+ return TextToken, l.r.Shift()
+ } else if l.at(']', ']', '>') {
+ l.r.Move(3)
+ return TextToken, l.r.Shift()
+ }
+ l.r.Move(1)
+ }
+ } else {
+ if l.atCaseInsensitive('d', 'o', 'c', 't', 'y', 'p', 'e') {
+ l.r.Move(7)
+ if l.r.Peek(0) == ' ' {
+ l.r.Move(1)
+ }
+ for {
+ if c := l.r.Peek(0); c == '>' || c == 0 {
+ l.text = l.r.Lexeme()[9:]
+ if c == '>' {
+ l.r.Move(1)
+ }
+ return DoctypeToken, l.r.Shift()
+ }
+ l.r.Move(1)
+ }
+ }
+ }
+ return CommentToken, l.shiftBogusComment()
+}
+
+func (l *Lexer) shiftBogusComment() []byte {
+ for {
+ c := l.r.Peek(0)
+ if c == '>' {
+ l.text = l.r.Lexeme()[2:]
+ l.r.Move(1)
+ return l.r.Shift()
+ } else if c == 0 {
+ l.text = l.r.Lexeme()[2:]
+ return l.r.Shift()
+ }
+ l.r.Move(1)
+ }
+}
+
+func (l *Lexer) shiftStartTag() (TokenType, []byte) {
+ for {
+ if c := l.r.Peek(0); c == ' ' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 {
+ break
+ }
+ l.r.Move(1)
+ }
+ l.text = parse.ToLower(l.r.Lexeme()[1:])
+ if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math {
+ if h == Svg {
+ l.inTag = false
+ return SvgToken, l.shiftXml(h)
+ } else if h == Math {
+ l.inTag = false
+ return MathToken, l.shiftXml(h)
+ }
+ l.rawTag = h
+ }
+ return StartTagToken, l.r.Shift()
+}
+
+func (l *Lexer) shiftAttribute() []byte {
+ nameStart := l.r.Pos()
+ var c byte
+ for { // attribute name state
+ if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 {
+ break
+ }
+ l.r.Move(1)
+ }
+ nameEnd := l.r.Pos()
+ for { // after attribute name state
+ if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+ l.r.Move(1)
+ continue
+ }
+ break
+ }
+ if c == '=' {
+ l.r.Move(1)
+ for { // before attribute value state
+ if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+ l.r.Move(1)
+ continue
+ }
+ break
+ }
+ attrPos := l.r.Pos()
+ delim := c
+ if delim == '"' || delim == '\'' { // attribute value single- and double-quoted state
+ l.r.Move(1)
+ for {
+ c := l.r.Peek(0)
+ if c == delim {
+ l.r.Move(1)
+ break
+ } else if c == 0 {
+ break
+ }
+ l.r.Move(1)
+ }
+ } else { // attribute value unquoted state
+ for {
+ if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 {
+ break
+ }
+ l.r.Move(1)
+ }
+ }
+ l.attrVal = l.r.Lexeme()[attrPos:]
+ } else {
+ l.r.Rewind(nameEnd)
+ l.attrVal = nil
+ }
+ l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd])
+ return l.r.Shift()
+}
+
+func (l *Lexer) shiftEndTag() []byte {
+ for {
+ c := l.r.Peek(0)
+ if c == '>' {
+ l.text = l.r.Lexeme()[2:]
+ l.r.Move(1)
+ break
+ } else if c == 0 {
+ l.text = l.r.Lexeme()[2:]
+ break
+ }
+ l.r.Move(1)
+ }
+
+ end := len(l.text)
+ for end > 0 {
+ if c := l.text[end-1]; c == ' ' || c == '\t' || c == '\n' || c == '\r' {
+ end--
+ continue
+ }
+ break
+ }
+ l.text = l.text[:end]
+ return parse.ToLower(l.r.Shift())
+}
+
+func (l *Lexer) shiftXml(rawTag Hash) []byte {
+ inQuote := false
+ for {
+ c := l.r.Peek(0)
+ if c == '"' {
+ inQuote = !inQuote
+ l.r.Move(1)
+ } else if c == '<' && !inQuote {
+ if l.r.Peek(1) == '/' {
+ mark := l.r.Pos()
+ l.r.Move(2)
+ for {
+ if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+ break
+ }
+ l.r.Move(1)
+ }
+ if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice
+ break
+ }
+ } else {
+ l.r.Move(1)
+ }
+ } else if c == 0 {
+ return l.r.Shift()
+ }
+ l.r.Move(1)
+ }
+
+ for {
+ c := l.r.Peek(0)
+ if c == '>' {
+ l.r.Move(1)
+ break
+ } else if c == 0 {
+ break
+ }
+ l.r.Move(1)
+ }
+ return l.r.Shift()
+}
+
+////////////////////////////////////////////////////////////////
+
+func (l *Lexer) at(b ...byte) bool {
+ for i, c := range b {
+ if l.r.Peek(i) != c {
+ return false
+ }
+ }
+ return true
+}
+
+func (l *Lexer) atCaseInsensitive(b ...byte) bool {
+ for i, c := range b {
+ if l.r.Peek(i) != c && (l.r.Peek(i)+('a'-'A')) != c {
+ return false
+ }
+ }
+ return true
+}
diff --git a/vendor/github.com/tdewolff/parse/html/lex_test.go b/vendor/github.com/tdewolff/parse/html/lex_test.go
new file mode 100644
index 0000000..5f4ca0b
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/html/lex_test.go
@@ -0,0 +1,262 @@
+package html // import "github.com/tdewolff/parse/html"
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "testing"
+
+ "github.com/tdewolff/parse"
+ "github.com/tdewolff/test"
+)
+
+type TTs []TokenType
+
+func TestTokens(t *testing.T) {
+ var tokenTests = []struct {
+ html string
+ expected []TokenType
+ }{
+ {"<html></html>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}},
+ {"<img/>", TTs{StartTagToken, StartTagVoidToken}},
+ {"<!-- comment -->", TTs{CommentToken}},
+ {"<!-- comment --!>", TTs{CommentToken}},
+ {"<p>text</p>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<input type='button'/>", TTs{StartTagToken, AttributeToken, StartTagVoidToken}},
+ {"<input type='button' value=''/>", TTs{StartTagToken, AttributeToken, AttributeToken, StartTagVoidToken}},
+ {"<input type='=/>' \r\n\t\f value=\"'\" name=x checked />", TTs{StartTagToken, AttributeToken, AttributeToken, AttributeToken, AttributeToken, StartTagVoidToken}},
+ {"<!doctype>", TTs{DoctypeToken}},
+ {"<!doctype html>", TTs{DoctypeToken}},
+ {"<?bogus>", TTs{CommentToken}},
+ {"</0bogus>", TTs{CommentToken}},
+ {"<!bogus>", TTs{CommentToken}},
+ {"< ", TTs{TextToken}},
+ {"</", TTs{TextToken}},
+
+ // raw tags
+ {"<title><p></p></title>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<TITLE><p></p></TITLE>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<plaintext></plaintext>", TTs{StartTagToken, StartTagCloseToken, TextToken}},
+ {"<script></script>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}},
+ {"<script>var x='</script>';</script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken}},
+ {"<script><!--var x='</script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken}},
+ {"<script><!--var x='<script></script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<script><!--var x='<script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<![CDATA[ test ]]>", TTs{TextToken}},
+ {"<svg>text</svg>", TTs{SvgToken}},
+ {"<math>text</math>", TTs{MathToken}},
+ {`<svg>text<x a="</svg>"></x></svg>`, TTs{SvgToken}},
+ {"<a><svg>text</svg></a>", TTs{StartTagToken, StartTagCloseToken, SvgToken, EndTagToken}},
+
+ // early endings
+ {"<!-- comment", TTs{CommentToken}},
+ {"<? bogus comment", TTs{CommentToken}},
+ {"<foo", TTs{StartTagToken}},
+ {"</foo", TTs{EndTagToken}},
+ {"<foo x", TTs{StartTagToken, AttributeToken}},
+ {"<foo x=", TTs{StartTagToken, AttributeToken}},
+ {"<foo x='", TTs{StartTagToken, AttributeToken}},
+ {"<foo x=''", TTs{StartTagToken, AttributeToken}},
+ {"<!DOCTYPE note SYSTEM", TTs{DoctypeToken}},
+ {"<![CDATA[ test", TTs{TextToken}},
+ {"<script>", TTs{StartTagToken, StartTagCloseToken}},
+ {"<script><!--", TTs{StartTagToken, StartTagCloseToken, TextToken}},
+ {"<script><!--var x='<script></script>';-->", TTs{StartTagToken, StartTagCloseToken, TextToken}},
+
+ // go-fuzz
+ {"</>", TTs{EndTagToken}},
+ }
+ for _, tt := range tokenTests {
+ t.Run(tt.html, func(t *testing.T) {
+ l := NewLexer(bytes.NewBufferString(tt.html))
+ i := 0
+ for {
+ token, _ := l.Next()
+ if token == ErrorToken {
+ test.T(t, l.Err(), io.EOF)
+ test.T(t, i, len(tt.expected), "when error occurred we must be at the end")
+ break
+ }
+ test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected))
+ if i < len(tt.expected) {
+ test.T(t, token, tt.expected[i], "token types must match")
+ }
+ i++
+ }
+ })
+ }
+
+ test.T(t, TokenType(100).String(), "Invalid(100)")
+}
+
+func TestTags(t *testing.T) {
+ var tagTests = []struct {
+ html string
+ expected string
+ }{
+ {"<foo:bar.qux-norf/>", "foo:bar.qux-norf"},
+ {"<foo?bar/qux>", "foo?bar/qux"},
+ {"<!DOCTYPE note SYSTEM \"Note.dtd\">", " note SYSTEM \"Note.dtd\""},
+ {"</foo >", "foo"},
+
+ // early endings
+ {"<foo ", "foo"},
+ }
+ for _, tt := range tagTests {
+ t.Run(tt.html, func(t *testing.T) {
+ l := NewLexer(bytes.NewBufferString(tt.html))
+ for {
+ token, _ := l.Next()
+ if token == ErrorToken {
+ test.T(t, l.Err(), io.EOF)
+ test.Fail(t, "when error occurred we must be at the end")
+ break
+ } else if token == StartTagToken || token == EndTagToken || token == DoctypeToken {
+ test.String(t, string(l.Text()), tt.expected)
+ break
+ }
+ }
+ })
+ }
+}
+
+func TestAttributes(t *testing.T) {
+ var attributeTests = []struct {
+ attr string
+ expected []string
+ }{
+ {"<foo a=\"b\" />", []string{"a", "\"b\""}},
+ {"<foo \nchecked \r\n value\r=\t'=/>\"' />", []string{"checked", "", "value", "'=/>\"'"}},
+ {"<foo bar=\" a \n\t\r b \" />", []string{"bar", "\" a \n\t\r b \""}},
+ {"<foo a/>", []string{"a", ""}},
+ {"<foo /=/>", []string{"/", "/"}},
+
+ // early endings
+ {"<foo x", []string{"x", ""}},
+ {"<foo x=", []string{"x", ""}},
+ {"<foo x='", []string{"x", "'"}},
+ }
+ for _, tt := range attributeTests {
+ t.Run(tt.attr, func(t *testing.T) {
+ l := NewLexer(bytes.NewBufferString(tt.attr))
+ i := 0
+ for {
+ token, _ := l.Next()
+ if token == ErrorToken {
+ test.T(t, l.Err(), io.EOF)
+ test.T(t, i, len(tt.expected), "when error occurred we must be at the end")
+ break
+ } else if token == AttributeToken {
+ test.That(t, i+1 < len(tt.expected), "index", i+1, "must not exceed expected attributes size", len(tt.expected))
+ if i+1 < len(tt.expected) {
+ test.String(t, string(l.Text()), tt.expected[i], "attribute keys must match")
+ test.String(t, string(l.AttrVal()), tt.expected[i+1], "attribute keys must match")
+ i += 2
+ }
+ }
+ }
+ })
+ }
+}
+
+func TestErrors(t *testing.T) {
+ var errorTests = []struct {
+ html string
+ col int
+ }{
+ {"a\x00b", 2},
+ }
+ for _, tt := range errorTests {
+ t.Run(tt.html, func(t *testing.T) {
+ l := NewLexer(bytes.NewBufferString(tt.html))
+ for {
+ token, _ := l.Next()
+ if token == ErrorToken {
+ if tt.col == 0 {
+ test.T(t, l.Err(), io.EOF)
+ } else if perr, ok := l.Err().(*parse.Error); ok {
+ test.T(t, perr.Col, tt.col)
+ } else {
+ test.Fail(t, "bad error:", l.Err())
+ }
+ break
+ }
+ }
+ })
+ }
+}
+
+////////////////////////////////////////////////////////////////
+
+var J int
+var ss = [][]byte{
+ []byte(" style"),
+ []byte("style"),
+ []byte(" \r\n\tstyle"),
+ []byte(" style"),
+ []byte(" x"),
+ []byte("x"),
+}
+
+func BenchmarkWhitespace1(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ for _, s := range ss {
+ j := 0
+ for {
+ if c := s[j]; c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+ j++
+ } else {
+ break
+ }
+ }
+ J += j
+ }
+ }
+}
+
+func BenchmarkWhitespace2(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ for _, s := range ss {
+ j := 0
+ for {
+ if c := s[j]; c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+ j++
+ continue
+ }
+ break
+ }
+ J += j
+ }
+ }
+}
+
+func BenchmarkWhitespace3(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ for _, s := range ss {
+ j := 0
+ for {
+ if c := s[j]; c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '\f' {
+ break
+ }
+ j++
+ }
+ J += j
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////
+
+func ExampleNewLexer() {
+ l := NewLexer(bytes.NewBufferString("<span class='user'>John Doe</span>"))
+ out := ""
+ for {
+ tt, data := l.Next()
+ if tt == ErrorToken {
+ break
+ }
+ out += string(data)
+ }
+ fmt.Println(out)
+ // Output: <span class='user'>John Doe</span>
+}
diff --git a/vendor/github.com/tdewolff/parse/html/util.go b/vendor/github.com/tdewolff/parse/html/util.go
new file mode 100644
index 0000000..c8c3aab
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/html/util.go
@@ -0,0 +1,129 @@
+package html // import "github.com/tdewolff/parse/html"
+
+import "github.com/tdewolff/parse"
+
+var (
+ singleQuoteEntityBytes = []byte("&#39;")
+ doubleQuoteEntityBytes = []byte("&#34;")
+)
+
+var charTable = [256]bool{
+ // ASCII
+ false, false, false, false, false, false, false, false,
+ false, true, true, true, true, true, false, false, // tab, new line, vertical tab, form feed, carriage return
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ true, false, true, false, false, false, true, true, // space, ", &, '
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, true, true, true, false, // <, =, >
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ true, false, false, false, false, false, false, false, // `
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ // non-ASCII
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+}
+
+// EscapeAttrVal returns the escaped attribute value bytes without quotes.
+func EscapeAttrVal(buf *[]byte, orig, b []byte) []byte {
+ singles := 0
+ doubles := 0
+ unquoted := true
+ entities := false
+ for i, c := range b {
+ if charTable[c] {
+ if c == '&' {
+ entities = true
+ if quote, n := parse.QuoteEntity(b[i:]); n > 0 {
+ if quote == '"' {
+ unquoted = false
+ doubles++
+ } else {
+ unquoted = false
+ singles++
+ }
+ }
+ } else {
+ unquoted = false
+ if c == '"' {
+ doubles++
+ } else if c == '\'' {
+ singles++
+ }
+ }
+ }
+ }
+ if unquoted {
+ return b
+ } else if !entities && len(orig) == len(b)+2 && (singles == 0 && orig[0] == '\'' || doubles == 0 && orig[0] == '"') {
+ return orig
+ }
+
+ n := len(b) + 2
+ var quote byte
+ var escapedQuote []byte
+ if doubles > singles {
+ n += singles * 4
+ quote = '\''
+ escapedQuote = singleQuoteEntityBytes
+ } else {
+ n += doubles * 4
+ quote = '"'
+ escapedQuote = doubleQuoteEntityBytes
+ }
+ if n > cap(*buf) {
+ *buf = make([]byte, 0, n) // maximum size, not actual size
+ }
+ t := (*buf)[:n] // maximum size, not actual size
+ t[0] = quote
+ j := 1
+ start := 0
+ for i, c := range b {
+ if c == '&' {
+ if entityQuote, n := parse.QuoteEntity(b[i:]); n > 0 {
+ j += copy(t[j:], b[start:i])
+ if entityQuote != quote {
+ t[j] = entityQuote
+ j++
+ } else {
+ j += copy(t[j:], escapedQuote)
+ }
+ start = i + n
+ }
+ } else if c == quote {
+ j += copy(t[j:], b[start:i])
+ j += copy(t[j:], escapedQuote)
+ start = i + 1
+ }
+ }
+ j += copy(t[j:], b[start:])
+ t[j] = quote
+ return t[:j+1]
+}
diff --git a/vendor/github.com/tdewolff/parse/html/util_test.go b/vendor/github.com/tdewolff/parse/html/util_test.go
new file mode 100644
index 0000000..3722a08
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/html/util_test.go
@@ -0,0 +1,43 @@
+package html // import "github.com/tdewolff/parse/html"
+
+import (
+ "testing"
+
+ "github.com/tdewolff/test"
+)
+
+func TestEscapeAttrVal(t *testing.T) {
+ var escapeAttrValTests = []struct {
+ attrVal string
+ expected string
+ }{
+ {"xyz", "xyz"},
+ {"", ""},
+ {"x&amp;z", "x&amp;z"},
+ {"x/z", "x/z"},
+ {"x'z", "\"x'z\""},
+ {"x\"z", "'x\"z'"},
+ {"'x\"z'", "'x\"z'"},
+ {"'x&#39;\"&#39;z'", "\"x'&#34;'z\""},
+ {"\"x&#34;'&#34;z\"", "'x\"&#39;\"z'"},
+ {"\"x&#x27;z\"", "\"x'z\""},
+ {"'x&#x00022;z'", "'x\"z'"},
+ {"'x\"&gt;'", "'x\"&gt;'"},
+ {"You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]", "\"You're encouraged to log in; however, it's not mandatory. [o]\""},
+ {"a'b=\"\"", "'a&#39;b=\"\"'"},
+ {"x<z", "\"x<z\""},
+ {"'x\"&#39;\"z'", "'x\"&#39;\"z'"},
+ }
+ var buf []byte
+ for _, tt := range escapeAttrValTests {
+ t.Run(tt.attrVal, func(t *testing.T) {
+ b := []byte(tt.attrVal)
+ orig := b
+ if len(b) > 1 && (b[0] == '"' || b[0] == '\'') && b[0] == b[len(b)-1] {
+ b = b[1 : len(b)-1]
+ }
+ val := EscapeAttrVal(&buf, orig, []byte(b))
+ test.String(t, string(val), tt.expected)
+ })
+ }
+}