aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/icon/finder.go
blob: c9da1bc02adf81a09df90c72ea704bd63af30e2d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// Copyright 2017 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.

package icon // import "miniflux.app/reader/icon"

import (
	"encoding/base64"
	"fmt"
	"io"
	"io/ioutil"
	"strings"

	"miniflux.app/crypto"
	"miniflux.app/http/client"
	"miniflux.app/logger"
	"miniflux.app/model"
	"miniflux.app/url"

	"github.com/PuerkitoBio/goquery"
)

// FindIcon try to find the website's icon.
func FindIcon(websiteURL string) (*model.Icon, error) {
	rootURL := url.RootURL(websiteURL)
	clt := client.New(rootURL)
	response, err := clt.Get()
	if err != nil {
		return nil, fmt.Errorf("unable to download website index page: %v", err)
	}

	if response.HasServerFailure() {
		return nil, fmt.Errorf("unable to download website index page: status=%d", response.StatusCode)
	}

	iconURL, err := parseDocument(rootURL, response.Body)
	if err != nil {
		return nil, err
	}

	if strings.HasPrefix(iconURL, "data:") {
		return parseImageDataURL(iconURL)
	}

	logger.Debug("[FindIcon] Fetching icon => %s", iconURL)
	icon, err := downloadIcon(iconURL)
	if err != nil {
		return nil, err
	}

	return icon, nil
}

func parseDocument(websiteURL string, data io.Reader) (string, error) {
	queries := []string{
		"link[rel='shortcut icon']",
		"link[rel='Shortcut Icon']",
		"link[rel='icon shortcut']",
		"link[rel='icon']",
	}

	doc, err := goquery.NewDocumentFromReader(data)
	if err != nil {
		return "", fmt.Errorf("unable to read document: %v", err)
	}

	var iconURL string
	for _, query := range queries {
		doc.Find(query).Each(func(i int, s *goquery.Selection) {
			if href, exists := s.Attr("href"); exists {
				iconURL = href
			}
		})

		if iconURL != "" {
			break
		}
	}

	if iconURL == "" {
		iconURL = url.RootURL(websiteURL) + "favicon.ico"
	} else {
		iconURL, _ = url.AbsoluteURL(websiteURL, iconURL)
	}

	return iconURL, nil
}

func downloadIcon(iconURL string) (*model.Icon, error) {
	clt := client.New(iconURL)
	response, err := clt.Get()
	if err != nil {
		return nil, fmt.Errorf("unable to download iconURL: %v", err)
	}

	if response.HasServerFailure() {
		return nil, fmt.Errorf("unable to download icon: status=%d", response.StatusCode)
	}

	body, err := ioutil.ReadAll(response.Body)
	if err != nil {
		return nil, fmt.Errorf("unable to read downloaded icon: %v", err)
	}

	if len(body) == 0 {
		return nil, fmt.Errorf("downloaded icon is empty, iconURL=%s", iconURL)
	}

	icon := &model.Icon{
		Hash:     crypto.HashFromBytes(body),
		MimeType: response.ContentType,
		Content:  body,
	}

	return icon, nil
}

func parseImageDataURL(value string) (*model.Icon, error) {
	colon := strings.Index(value, ":")
	semicolon := strings.Index(value, ";")
	comma := strings.Index(value, ",")

	if colon <= 0 || semicolon <= 0 || comma <= 0 {
		return nil, fmt.Errorf(`icon: invalid data url "%s"`, value)
	}

	mimeType := value[colon+1 : semicolon]
	encoding := value[semicolon+1 : comma]
	data := value[comma+1:]

	if encoding != "base64" {
		return nil, fmt.Errorf(`icon: unsupported data url encoding "%s"`, value)
	}

	if !strings.HasPrefix(mimeType, "image/") {
		return nil, fmt.Errorf(`icon: invalid mime type "%s"`, mimeType)
	}

	blob, err := base64.StdEncoding.DecodeString(data)
	if err != nil {
		return nil, fmt.Errorf(`icon: invalid data "%s" (%v)`, value, err)
	}

	if len(blob) == 0 {
		return nil, fmt.Errorf(`icon: empty data "%s"`, value)
	}

	icon := &model.Icon{
		Hash:     crypto.HashFromBytes(blob),
		Content:  blob,
		MimeType: mimeType,
	}

	return icon, nil
}