diff options
Diffstat (limited to 'vendor/golang.org/x/net/html')
-rw-r--r-- | vendor/golang.org/x/net/html/atom/gen.go | 712 | ||||
-rw-r--r-- | vendor/golang.org/x/net/html/parse.go | 127 |
2 files changed, 110 insertions, 729 deletions
diff --git a/vendor/golang.org/x/net/html/atom/gen.go b/vendor/golang.org/x/net/html/atom/gen.go deleted file mode 100644 index 5d05278..0000000 --- a/vendor/golang.org/x/net/html/atom/gen.go +++ /dev/null @@ -1,712 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -//go:generate go run gen.go -//go:generate go run gen.go -test - -package main - -import ( - "bytes" - "flag" - "fmt" - "go/format" - "io/ioutil" - "math/rand" - "os" - "sort" - "strings" -) - -// identifier converts s to a Go exported identifier. -// It converts "div" to "Div" and "accept-charset" to "AcceptCharset". -func identifier(s string) string { - b := make([]byte, 0, len(s)) - cap := true - for _, c := range s { - if c == '-' { - cap = true - continue - } - if cap && 'a' <= c && c <= 'z' { - c -= 'a' - 'A' - } - cap = false - b = append(b, byte(c)) - } - return string(b) -} - -var test = flag.Bool("test", false, "generate table_test.go") - -func genFile(name string, buf *bytes.Buffer) { - b, err := format.Source(buf.Bytes()) - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } - if err := ioutil.WriteFile(name, b, 0644); err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } -} - -func main() { - flag.Parse() - - var all []string - all = append(all, elements...) - all = append(all, attributes...) - all = append(all, eventHandlers...) - all = append(all, extra...) - sort.Strings(all) - - // uniq - lists have dups - w := 0 - for _, s := range all { - if w == 0 || all[w-1] != s { - all[w] = s - w++ - } - } - all = all[:w] - - if *test { - var buf bytes.Buffer - fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") - fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n") - fmt.Fprintln(&buf, "package atom\n") - fmt.Fprintln(&buf, "var testAtomList = []string{") - for _, s := range all { - fmt.Fprintf(&buf, "\t%q,\n", s) - } - fmt.Fprintln(&buf, "}") - - genFile("table_test.go", &buf) - return - } - - // Find hash that minimizes table size. - var best *table - for i := 0; i < 1000000; i++ { - if best != nil && 1<<(best.k-1) < len(all) { - break - } - h := rand.Uint32() - for k := uint(0); k <= 16; k++ { - if best != nil && k >= best.k { - break - } - var t table - if t.init(h, k, all) { - best = &t - break - } - } - } - if best == nil { - fmt.Fprintf(os.Stderr, "failed to construct string table\n") - os.Exit(1) - } - - // Lay out strings, using overlaps when possible. - layout := append([]string{}, all...) - - // Remove strings that are substrings of other strings - for changed := true; changed; { - changed = false - for i, s := range layout { - if s == "" { - continue - } - for j, t := range layout { - if i != j && t != "" && strings.Contains(s, t) { - changed = true - layout[j] = "" - } - } - } - } - - // Join strings where one suffix matches another prefix. - for { - // Find best i, j, k such that layout[i][len-k:] == layout[j][:k], - // maximizing overlap length k. - besti := -1 - bestj := -1 - bestk := 0 - for i, s := range layout { - if s == "" { - continue - } - for j, t := range layout { - if i == j { - continue - } - for k := bestk + 1; k <= len(s) && k <= len(t); k++ { - if s[len(s)-k:] == t[:k] { - besti = i - bestj = j - bestk = k - } - } - } - } - if bestk > 0 { - layout[besti] += layout[bestj][bestk:] - layout[bestj] = "" - continue - } - break - } - - text := strings.Join(layout, "") - - atom := map[string]uint32{} - for _, s := range all { - off := strings.Index(text, s) - if off < 0 { - panic("lost string " + s) - } - atom[s] = uint32(off<<8 | len(s)) - } - - var buf bytes.Buffer - // Generate the Go code. - fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") - fmt.Fprintln(&buf, "//go:generate go run gen.go\n") - fmt.Fprintln(&buf, "package atom\n\nconst (") - - // compute max len - maxLen := 0 - for _, s := range all { - if maxLen < len(s) { - maxLen = len(s) - } - fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s]) - } - fmt.Fprintln(&buf, ")\n") - - fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0) - fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen) - - fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k) - for i, s := range best.tab { - if s == "" { - continue - } - fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s) - } - fmt.Fprintf(&buf, "}\n") - datasize := (1 << best.k) * 4 - - fmt.Fprintln(&buf, "const atomText =") - textsize := len(text) - for len(text) > 60 { - fmt.Fprintf(&buf, "\t%q +\n", text[:60]) - text = text[60:] - } - fmt.Fprintf(&buf, "\t%q\n\n", text) - - genFile("table.go", &buf) - - fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) -} - -type byLen []string - -func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } -func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x byLen) Len() int { return len(x) } - -// fnv computes the FNV hash with an arbitrary starting value h. -func fnv(h uint32, s string) uint32 { - for i := 0; i < len(s); i++ { - h ^= uint32(s[i]) - h *= 16777619 - } - return h -} - -// A table represents an attempt at constructing the lookup table. -// The lookup table uses cuckoo hashing, meaning that each string -// can be found in one of two positions. -type table struct { - h0 uint32 - k uint - mask uint32 - tab []string -} - -// hash returns the two hashes for s. -func (t *table) hash(s string) (h1, h2 uint32) { - h := fnv(t.h0, s) - h1 = h & t.mask - h2 = (h >> 16) & t.mask - return -} - -// init initializes the table with the given parameters. -// h0 is the initial hash value, -// k is the number of bits of hash value to use, and -// x is the list of strings to store in the table. -// init returns false if the table cannot be constructed. -func (t *table) init(h0 uint32, k uint, x []string) bool { - t.h0 = h0 - t.k = k - t.tab = make([]string, 1<<k) - t.mask = 1<<k - 1 - for _, s := range x { - if !t.insert(s) { - return false - } - } - return true -} - -// insert inserts s in the table. -func (t *table) insert(s string) bool { - h1, h2 := t.hash(s) - if t.tab[h1] == "" { - t.tab[h1] = s - return true - } - if t.tab[h2] == "" { - t.tab[h2] = s - return true - } - if t.push(h1, 0) { - t.tab[h1] = s - return true - } - if t.push(h2, 0) { - t.tab[h2] = s - return true - } - return false -} - -// push attempts to push aside the entry in slot i. -func (t *table) push(i uint32, depth int) bool { - if depth > len(t.tab) { - return false - } - s := t.tab[i] - h1, h2 := t.hash(s) - j := h1 + h2 - i - if t.tab[j] != "" && !t.push(j, depth+1) { - return false - } - t.tab[j] = s - return true -} - -// The lists of element names and attribute keys were taken from -// https://html.spec.whatwg.org/multipage/indices.html#index -// as of the "HTML Living Standard - Last Updated 16 April 2018" version. - -// "command", "keygen" and "menuitem" have been removed from the spec, -// but are kept here for backwards compatibility. -var elements = []string{ - "a", - "abbr", - "address", - "area", - "article", - "aside", - "audio", - "b", - "base", - "bdi", - "bdo", - "blockquote", - "body", - "br", - "button", - "canvas", - "caption", - "cite", - "code", - "col", - "colgroup", - "command", - "data", - "datalist", - "dd", - "del", - "details", - "dfn", - "dialog", - "div", - "dl", - "dt", - "em", - "embed", - "fieldset", - "figcaption", - "figure", - "footer", - "form", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "head", - "header", - "hgroup", - "hr", - "html", - "i", - "iframe", - "img", - "input", - "ins", - "kbd", - "keygen", - "label", - "legend", - "li", - "link", - "main", - "map", - "mark", - "menu", - "menuitem", - "meta", - "meter", - "nav", - "noscript", - "object", - "ol", - "optgroup", - "option", - "output", - "p", - "param", - "picture", - "pre", - "progress", - "q", - "rp", - "rt", - "ruby", - "s", - "samp", - "script", - "section", - "select", - "slot", - "small", - "source", - "span", - "strong", - "style", - "sub", - "summary", - "sup", - "table", - "tbody", - "td", - "template", - "textarea", - "tfoot", - "th", - "thead", - "time", - "title", - "tr", - "track", - "u", - "ul", - "var", - "video", - "wbr", -} - -// https://html.spec.whatwg.org/multipage/indices.html#attributes-3 -// -// "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup", -// "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec, -// but are kept here for backwards compatibility. -var attributes = []string{ - "abbr", - "accept", - "accept-charset", - "accesskey", - "action", - "allowfullscreen", - "allowpaymentrequest", - "allowusermedia", - "alt", - "as", - "async", - "autocomplete", - "autofocus", - "autoplay", - "challenge", - "charset", - "checked", - "cite", - "class", - "color", - "cols", - "colspan", - "command", - "content", - "contenteditable", - "contextmenu", - "controls", - "coords", - "crossorigin", - "data", - "datetime", - "default", - "defer", - "dir", - "dirname", - "disabled", - "download", - "draggable", - "dropzone", - "enctype", - "for", - "form", - "formaction", - "formenctype", - "formmethod", - "formnovalidate", - "formtarget", - "headers", - "height", - "hidden", - "high", - "href", - "hreflang", - "http-equiv", - "icon", - "id", - "inputmode", - "integrity", - "is", - "ismap", - "itemid", - "itemprop", - "itemref", - "itemscope", - "itemtype", - "keytype", - "kind", - "label", - "lang", - "list", - "loop", - "low", - "manifest", - "max", - "maxlength", - "media", - "mediagroup", - "method", - "min", - "minlength", - "multiple", - "muted", - "name", - "nomodule", - "nonce", - "novalidate", - "open", - "optimum", - "pattern", - "ping", - "placeholder", - "playsinline", - "poster", - "preload", - "radiogroup", - "readonly", - "referrerpolicy", - "rel", - "required", - "reversed", - "rows", - "rowspan", - "sandbox", - "spellcheck", - "scope", - "scoped", - "seamless", - "selected", - "shape", - "size", - "sizes", - "sortable", - "sorted", - "slot", - "span", - "spellcheck", - "src", - "srcdoc", - "srclang", - "srcset", - "start", - "step", - "style", - "tabindex", - "target", - "title", - "translate", - "type", - "typemustmatch", - "updateviacache", - "usemap", - "value", - "width", - "workertype", - "wrap", -} - -// "onautocomplete", "onautocompleteerror", "onmousewheel", -// "onshow" and "onsort" have been removed from the spec, -// but are kept here for backwards compatibility. -var eventHandlers = []string{ - "onabort", - "onautocomplete", - "onautocompleteerror", - "onauxclick", - "onafterprint", - "onbeforeprint", - "onbeforeunload", - "onblur", - "oncancel", - "oncanplay", - "oncanplaythrough", - "onchange", - "onclick", - "onclose", - "oncontextmenu", - "oncopy", - "oncuechange", - "oncut", - "ondblclick", - "ondrag", - "ondragend", - "ondragenter", - "ondragexit", - "ondragleave", - "ondragover", - "ondragstart", - "ondrop", - "ondurationchange", - "onemptied", - "onended", - "onerror", - "onfocus", - "onhashchange", - "oninput", - "oninvalid", - "onkeydown", - "onkeypress", - "onkeyup", - "onlanguagechange", - "onload", - "onloadeddata", - "onloadedmetadata", - "onloadend", - "onloadstart", - "onmessage", - "onmessageerror", - "onmousedown", - "onmouseenter", - "onmouseleave", - "onmousemove", - "onmouseout", - "onmouseover", - "onmouseup", - "onmousewheel", - "onwheel", - "onoffline", - "ononline", - "onpagehide", - "onpageshow", - "onpaste", - "onpause", - "onplay", - "onplaying", - "onpopstate", - "onprogress", - "onratechange", - "onreset", - "onresize", - "onrejectionhandled", - "onscroll", - "onsecuritypolicyviolation", - "onseeked", - "onseeking", - "onselect", - "onshow", - "onsort", - "onstalled", - "onstorage", - "onsubmit", - "onsuspend", - "ontimeupdate", - "ontoggle", - "onunhandledrejection", - "onunload", - "onvolumechange", - "onwaiting", -} - -// extra are ad-hoc values not covered by any of the lists above. -var extra = []string{ - "acronym", - "align", - "annotation", - "annotation-xml", - "applet", - "basefont", - "bgsound", - "big", - "blink", - "center", - "color", - "desc", - "face", - "font", - "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. - "foreignobject", - "frame", - "frameset", - "image", - "isindex", - "listing", - "malignmark", - "marquee", - "math", - "mglyph", - "mi", - "mn", - "mo", - "ms", - "mtext", - "nobr", - "noembed", - "noframes", - "plaintext", - "prompt", - "public", - "rb", - "rtc", - "spacer", - "strike", - "svg", - "system", - "tt", - "xmp", -} diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go index ca2cb58..992cff2 100644 --- a/vendor/golang.org/x/net/html/parse.go +++ b/vendor/golang.org/x/net/html/parse.go @@ -630,7 +630,16 @@ func inHeadIM(p *parser) bool { p.oe.pop() p.acknowledgeSelfClosingTag() return true - case a.Script, a.Title, a.Noscript, a.Noframes, a.Style: + case a.Noscript: + p.addElement() + if p.scripting { + p.setOriginalIM() + p.im = textIM + } else { + p.im = inHeadNoscriptIM + } + return true + case a.Script, a.Title, a.Noframes, a.Style: p.addElement() p.setOriginalIM() p.im = textIM @@ -692,6 +701,49 @@ func inHeadIM(p *parser) bool { return false } +// 12.2.6.4.5. +func inHeadNoscriptIM(p *parser) bool { + switch p.tok.Type { + case DoctypeToken: + // Ignore the token. + return true + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style: + return inHeadIM(p) + case a.Head, a.Noscript: + // Ignore the token. + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Noscript, a.Br: + default: + // Ignore the token. + return true + } + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) == 0 { + // It was all whitespace. + return inHeadIM(p) + } + case CommentToken: + return inHeadIM(p) + } + p.oe.pop() + if p.top().DataAtom != a.Head { + panic("html: the new current node will be a head element.") + } + p.im = inHeadIM + if p.tok.DataAtom == a.Noscript { + return true + } + return false +} + // Section 12.2.6.4.6. func afterHeadIM(p *parser) bool { switch p.tok.Type { @@ -901,7 +953,7 @@ func inBodyIM(p *parser) bool { case a.A: for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- { if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A { - p.inBodyEndTagFormatting(a.A) + p.inBodyEndTagFormatting(a.A, "a") p.oe.remove(n) p.afe.remove(n) break @@ -915,7 +967,7 @@ func inBodyIM(p *parser) bool { case a.Nobr: p.reconstructActiveFormattingElements() if p.elementInScope(defaultScope, a.Nobr) { - p.inBodyEndTagFormatting(a.Nobr) + p.inBodyEndTagFormatting(a.Nobr, "nobr") p.reconstructActiveFormattingElements() } p.addFormattingElement() @@ -1123,7 +1175,7 @@ func inBodyIM(p *parser) bool { case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6) case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: - p.inBodyEndTagFormatting(p.tok.DataAtom) + p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data) case a.Applet, a.Marquee, a.Object: if p.popUntil(defaultScope, p.tok.DataAtom) { p.clearActiveFormattingElements() @@ -1134,7 +1186,7 @@ func inBodyIM(p *parser) bool { case a.Template: return inHeadIM(p) default: - p.inBodyEndTagOther(p.tok.DataAtom) + p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data) } case CommentToken: p.addChild(&Node{ @@ -1161,7 +1213,7 @@ func inBodyIM(p *parser) bool { return true } -func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { +func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) { // This is the "adoption agency" algorithm, described at // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency @@ -1183,7 +1235,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { } } if formattingElement == nil { - p.inBodyEndTagOther(tagAtom) + p.inBodyEndTagOther(tagAtom, tagName) return } feIndex := p.oe.index(formattingElement) @@ -1288,9 +1340,17 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM. // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign -func (p *parser) inBodyEndTagOther(tagAtom a.Atom) { +func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) { for i := len(p.oe) - 1; i >= 0; i-- { - if p.oe[i].DataAtom == tagAtom { + // Two element nodes have the same tag if they have the same Data (a + // string-typed field). As an optimization, for common HTML tags, each + // Data string is assigned a unique, non-zero DataAtom (a uint32-typed + // field), since integer comparison is faster than string comparison. + // Uncommon (custom) tags get a zero DataAtom. + // + // The if condition here is equivalent to (p.oe[i].Data == tagName). + if (p.oe[i].DataAtom == tagAtom) && + ((tagAtom != 0) || (p.oe[i].Data == tagName)) { p.oe = p.oe[:i] break } @@ -1684,8 +1744,9 @@ func inCellIM(p *parser) bool { return true } // Close the cell and reprocess. - p.popUntil(tableScope, a.Td, a.Th) - p.clearActiveFormattingElements() + if p.popUntil(tableScope, a.Td, a.Th) { + p.clearActiveFormattingElements() + } p.im = inRowIM return false } @@ -2239,6 +2300,33 @@ func (p *parser) parse() error { // // The input is assumed to be UTF-8 encoded. func Parse(r io.Reader) (*Node, error) { + return ParseWithOptions(r) +} + +// ParseFragment parses a fragment of HTML and returns the nodes that were +// found. If the fragment is the InnerHTML for an existing element, pass that +// element in context. +// +// It has the same intricacies as Parse. +func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { + return ParseFragmentWithOptions(r, context) +} + +// ParseOption configures a parser. +type ParseOption func(p *parser) + +// ParseOptionEnableScripting configures the scripting flag. +// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting +// +// By default, scripting is enabled. +func ParseOptionEnableScripting(enable bool) ParseOption { + return func(p *parser) { + p.scripting = enable + } +} + +// ParseWithOptions is like Parse, with options. +func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) { p := &parser{ tokenizer: NewTokenizer(r), doc: &Node{ @@ -2248,6 +2336,11 @@ func Parse(r io.Reader) (*Node, error) { framesetOK: true, im: initialIM, } + + for _, f := range opts { + f(p) + } + err := p.parse() if err != nil { return nil, err @@ -2255,12 +2348,8 @@ func Parse(r io.Reader) (*Node, error) { return p.doc, nil } -// ParseFragment parses a fragment of HTML and returns the nodes that were -// found. If the fragment is the InnerHTML for an existing element, pass that -// element in context. -// -// It has the same intricacies as Parse. -func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { +// ParseFragmentWithOptions is like ParseFragment, with options. +func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) { contextTag := "" if context != nil { if context.Type != ElementNode { @@ -2284,6 +2373,10 @@ func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { context: context, } + for _, f := range opts { + f(p) + } + root := &Node{ Type: ElementNode, DataAtom: a.Html, |