diff options
Diffstat (limited to 'vendor/github.com/tdewolff/parse/html/README.md')
-rw-r--r-- | vendor/github.com/tdewolff/parse/html/README.md | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/html/README.md b/vendor/github.com/tdewolff/parse/html/README.md new file mode 100644 index 0000000..edca629 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/html/README.md @@ -0,0 +1,98 @@ +# HTML [![GoDoc](http://godoc.org/github.com/tdewolff/parse/html?status.svg)](http://godoc.org/github.com/tdewolff/parse/html) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/html)](http://gocover.io/github.com/tdewolff/parse/html) + +This package is an HTML5 lexer written in [Go][1]. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF. + +## Installation +Run the following command + + go get github.com/tdewolff/parse/html + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/html" + +## Lexer +### Usage +The following initializes a new Lexer with io.Reader `r`: +``` go +l := html.NewLexer(r) +``` + +To tokenize until EOF an error, use: +``` go +for { + tt, data := l.Next() + switch tt { + case html.ErrorToken: + // error or EOF set in l.Err() + return + case html.StartTagToken: + // ... + for { + ttAttr, dataAttr := l.Next() + if ttAttr != html.AttributeToken { + break + } + // ... + } + // ... + } +} +``` + +All tokens: +``` go +ErrorToken TokenType = iota // extra token when errors occur +CommentToken +DoctypeToken +StartTagToken +StartTagCloseToken +StartTagVoidToken +EndTagToken +AttributeToken +TextToken +``` + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/html" +) + +// Tokenize HTML from stdin. +func main() { + l := html.NewLexer(os.Stdin) + for { + tt, data := l.Next() + switch tt { + case html.ErrorToken: + if l.Err() != io.EOF { + fmt.Println("Error on line", l.Line(), ":", l.Err()) + } + return + case html.StartTagToken: + fmt.Println("Tag", string(data)) + for { + ttAttr, dataAttr := l.Next() + if ttAttr != html.AttributeToken { + break + } + + key := dataAttr + val := l.AttrVal() + fmt.Println("Attribute", string(key), "=", string(val)) + } + // ... + } + } +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" |