aboutsummaryrefslogtreecommitdiffhomepage
path: root/http
diff options
context:
space:
mode:
Diffstat (limited to 'http')
-rw-r--r--http/client.go3
-rw-r--r--http/response.go19
2 files changed, 19 insertions, 3 deletions
diff --git a/http/client.go b/http/client.go
index cdff0ce..b8541a2 100644
--- a/http/client.go
+++ b/http/client.go
@@ -95,11 +95,12 @@ func (c *Client) executeRequest(request *http.Request) (*Response, error) {
ContentLength: resp.ContentLength,
}
- logger.Debug("[HttpClient:%s] OriginalURL=%s, StatusCode=%d, ContentLength=%d, ETag=%s, LastModified=%s, EffectiveURL=%s",
+ logger.Debug("[HttpClient:%s] OriginalURL=%s, StatusCode=%d, ContentLength=%d, ContentType=%s, ETag=%s, LastModified=%s, EffectiveURL=%s",
request.Method,
c.url,
response.StatusCode,
resp.ContentLength,
+ response.ContentType,
response.ETag,
response.LastModified,
response.EffectiveURL,
diff --git a/http/response.go b/http/response.go
index d9e9db6..a0cfc3f 100644
--- a/http/response.go
+++ b/http/response.go
@@ -6,8 +6,10 @@ package http
import (
"io"
+ "mime"
"strings"
+ "github.com/miniflux/miniflux/logger"
"golang.org/x/net/html/charset"
)
@@ -45,9 +47,22 @@ func (r *Response) IsModified(etag, lastModified string) bool {
}
// NormalizeBodyEncoding make sure the body is encoded in UTF-8.
+//
+// If a charset other than UTF-8 is detected, we convert the document to UTF-8.
+// This is used by the scraper and feed readers.
+//
+// Do not forget edge cases:
+// - Some non-utf8 feeds specify encoding only in Content-Type, not in XML document.
func (r *Response) NormalizeBodyEncoding() (io.Reader, error) {
- if strings.Contains(r.ContentType, "charset=") {
- return charset.NewReader(r.Body, r.ContentType)
+ _, params, err := mime.ParseMediaType(r.ContentType)
+ if err == nil {
+ if enc, found := params["charset"]; found {
+ enc = strings.ToLower(enc)
+ if enc != "utf-8" && enc != "utf8" && enc != "" {
+ logger.Debug("[NormalizeBodyEncoding] Convert body to UTF-8 from %s", enc)
+ return charset.NewReader(r.Body, r.ContentType)
+ }
+ }
}
return r.Body, nil
}