aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2019-12-26 15:26:23 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2019-12-26 15:56:59 -0800
commit3debf75eb9229144a05701e03ba59408a75dd815 (patch)
tree9e9eb6569db3234b514f798d4278b20793b79833
parent200b1c304b999191a29f36d4122e7aa05481125c (diff)
Normalize URL query string before executing HTTP requests
- Make sure query strings parameters are encoded - As opposed to the standard library, do not append equal sign for query parameters with empty value - Strip URL fragments like Web browsers
-rw-r--r--http/client/client.go42
-rw-r--r--http/client/response.go19
-rw-r--r--http/client/response_test.go4
-rw-r--r--reader/feed/handler.go4
-rw-r--r--reader/parser/parser_test.go2
-rw-r--r--reader/subscription/finder.go4
-rw-r--r--url/url.go57
-rw-r--r--url/url_test.go22
8 files changed, 128 insertions, 26 deletions
diff --git a/http/client/client.go b/http/client/client.go
index 175fe9f..d38859a 100644
--- a/http/client/client.go
+++ b/http/client/client.go
@@ -22,6 +22,7 @@ import (
"miniflux.app/errors"
"miniflux.app/logger"
"miniflux.app/timer"
+ url_helper "miniflux.app/url"
"miniflux.app/version"
)
@@ -37,7 +38,8 @@ var (
// Client is a HTTP Client :)
type Client struct {
- url string
+ inputURL string
+ requestURL string
etagHeader string
lastModifiedHeader string
authorizationHeader string
@@ -47,6 +49,18 @@ type Client struct {
Insecure bool
}
+func (c *Client) String() string {
+ return fmt.Sprintf(
+ `InputURL=%q RequestURL=%q ETag=%s LastModified=%q BasicAuth=%v UserAgent=%q`,
+ c.inputURL,
+ c.requestURL,
+ c.etagHeader,
+ c.lastModifiedHeader,
+ c.authorizationHeader != "" || (c.username != "" && c.password != ""),
+ c.userAgent,
+ )
+}
+
// WithCredentials defines the username/password for HTTP Basic authentication.
func (c *Client) WithCredentials(username, password string) *Client {
if username != "" && password != "" {
@@ -115,7 +129,12 @@ func (c *Client) PostJSON(data interface{}) (*Response, error) {
}
func (c *Client) executeRequest(request *http.Request) (*Response, error) {
- defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient] url=%s", c.url))
+ defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient] inputURL=%s", c.inputURL))
+
+ logger.Debug("[HttpClient:Before] Method=%s %s",
+ request.Method,
+ c.String(),
+ )
client := c.buildClient()
resp, err := client.Do(request)
@@ -162,21 +181,15 @@ func (c *Client) executeRequest(request *http.Request) (*Response, error) {
EffectiveURL: resp.Request.URL.String(),
LastModified: resp.Header.Get("Last-Modified"),
ETag: resp.Header.Get("ETag"),
+ Expires: resp.Header.Get("Expires"),
ContentType: resp.Header.Get("Content-Type"),
ContentLength: resp.ContentLength,
}
- logger.Debug("[HttpClient:%s] URL=%s, EffectiveURL=%s, Code=%d, Length=%d, Type=%s, ETag=%s, LastMod=%s, Expires=%s, Auth=%v",
+ logger.Debug("[HttpClient:After] Method=%s %s; Response => %s",
request.Method,
- c.url,
- response.EffectiveURL,
- response.StatusCode,
- resp.ContentLength,
- response.ContentType,
- response.ETag,
- response.LastModified,
- resp.Header.Get("Expires"),
- c.username != "",
+ c.String(),
+ response,
)
// Ignore caching headers for feeds that do not want any cache.
@@ -190,7 +203,8 @@ func (c *Client) executeRequest(request *http.Request) (*Response, error) {
}
func (c *Client) buildRequest(method string, body io.Reader) (*http.Request, error) {
- request, err := http.NewRequest(method, c.url, body)
+ c.requestURL = url_helper.RequestURI(c.inputURL)
+ request, err := http.NewRequest(method, c.requestURL, body)
if err != nil {
return nil, err
}
@@ -238,5 +252,5 @@ func (c *Client) buildHeaders() http.Header {
// New returns a new HTTP client.
func New(url string) *Client {
- return &Client{url: url, userAgent: DefaultUserAgent, Insecure: false}
+ return &Client{inputURL: url, userAgent: DefaultUserAgent, Insecure: false}
}
diff --git a/http/client/response.go b/http/client/response.go
index e62b5ab..122c40c 100644
--- a/http/client/response.go
+++ b/http/client/response.go
@@ -6,6 +6,7 @@ package client // import "miniflux.app/http/client"
import (
"bytes"
+ "fmt"
"io"
"io/ioutil"
"regexp"
@@ -24,10 +25,24 @@ type Response struct {
EffectiveURL string
LastModified string
ETag string
+ Expires string
ContentType string
ContentLength int64
}
+func (r *Response) String() string {
+ return fmt.Sprintf(
+ `StatusCode=%d EffectiveURL=%q LastModified=%q ETag=%s Expires=%s ContentType=%q ContentLength=%d`,
+ r.StatusCode,
+ r.EffectiveURL,
+ r.LastModified,
+ r.ETag,
+ r.Expires,
+ r.ContentType,
+ r.ContentLength,
+ )
+}
+
// IsNotFound returns true if the resource doesn't exists anymore.
func (r *Response) IsNotFound() bool {
return r.StatusCode == 404 || r.StatusCode == 410
@@ -105,8 +120,8 @@ func (r *Response) EnsureUnicodeBody() (err error) {
return err
}
-// String returns the response body as string.
-func (r *Response) String() string {
+// BodyAsString returns the response body as string.
+func (r *Response) BodyAsString() string {
bytes, _ := ioutil.ReadAll(r.Body)
return string(bytes)
}
diff --git a/http/client/response_test.go b/http/client/response_test.go
index 818fd81..6a2d8c2 100644
--- a/http/client/response_test.go
+++ b/http/client/response_test.go
@@ -95,7 +95,7 @@ func TestToString(t *testing.T) {
input := `test`
r := &Response{Body: strings.NewReader(input)}
- if r.String() != input {
+ if r.BodyAsString() != input {
t.Error(`Unexpected ouput`)
}
}
@@ -140,7 +140,7 @@ func TestEnsureUnicodeWithHTMLDocuments(t *testing.T) {
t.Fatalf(`Unicode conversion error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
}
- isUnicode := utf8.ValidString(r.String())
+ isUnicode := utf8.ValidString(r.BodyAsString())
if isUnicode != tc.convertedToUnicode {
t.Errorf(`Unicode conversion %q - %q, got: %v, expected: %v`,
tc.filename, tc.contentType, isUnicode, tc.convertedToUnicode)
diff --git a/reader/feed/handler.go b/reader/feed/handler.go
index b272ea1..7dd0bc7 100644
--- a/reader/feed/handler.go
+++ b/reader/feed/handler.go
@@ -52,7 +52,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool,
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
}
- subscription, parseErr := parser.ParseFeed(response.String())
+ subscription, parseErr := parser.ParseFeed(response.BodyAsString())
if parseErr != nil {
return nil, parseErr
}
@@ -106,7 +106,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
- updatedFeed, parseErr := parser.ParseFeed(response.String())
+ updatedFeed, parseErr := parser.ParseFeed(response.BodyAsString())
if parseErr != nil {
originalFeed.WithError(parseErr.Localize(printer))
h.store.UpdateFeedError(originalFeed)
diff --git a/reader/parser/parser_test.go b/reader/parser/parser_test.go
index a5a79a9..ddbacec 100644
--- a/reader/parser/parser_test.go
+++ b/reader/parser/parser_test.go
@@ -191,7 +191,7 @@ func TestDifferentEncodingWithResponse(t *testing.T) {
t.Fatalf(`Encoding error for %q: %v`, tc.filename, encodingErr)
}
- feed, parseErr := ParseFeed(r.String())
+ feed, parseErr := ParseFeed(r.BodyAsString())
if parseErr != nil {
t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
}
diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go
index fab8d93..66bbedd 100644
--- a/reader/subscription/finder.go
+++ b/reader/subscription/finder.go
@@ -18,7 +18,7 @@ import (
)
var (
- errUnreadableDoc = "Unable to analyze this page: %v"
+ errUnreadableDoc = "Unable to analyze this page: %v"
)
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
@@ -31,7 +31,7 @@ func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscr
return nil, err
}
- body := response.String()
+ body := response.BodyAsString()
if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
var subscriptions Subscriptions
subscriptions = append(subscriptions, &Subscription{
diff --git a/url/url.go b/url/url.go
index cc35e2f..b02348f 100644
--- a/url/url.go
+++ b/url/url.go
@@ -4,9 +4,12 @@
package url // import "miniflux.app/url"
-import "net/url"
-import "fmt"
-import "strings"
+import (
+ "fmt"
+ "net/url"
+ "sort"
+ "strings"
+)
// AbsoluteURL converts the input URL as absolute URL if necessary.
func AbsoluteURL(baseURL, input string) (string, error) {
@@ -69,3 +72,51 @@ func Domain(websiteURL string) string {
return parsedURL.Host
}
+
+// RequestURI returns the encoded URI to be used in HTTP requests.
+func RequestURI(websiteURL string) string {
+ u, err := url.Parse(websiteURL)
+ if err != nil {
+ return websiteURL
+ }
+
+ queryValues := u.Query()
+ u.RawQuery = "" // Clear RawQuery to make sure it's encoded properly.
+ u.Fragment = "" // Clear fragment because Web browsers strip #fragment before sending the URL to a web server.
+
+ var buf strings.Builder
+ buf.WriteString(u.String())
+
+ if len(queryValues) > 0 {
+ buf.WriteByte('?')
+
+ // Sort keys.
+ keys := make([]string, 0, len(queryValues))
+ for k := range queryValues {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+
+ i := 0
+ for _, key := range keys {
+ keyEscaped := url.QueryEscape(key)
+ values := queryValues[key]
+ for _, value := range values {
+ if i > 0 {
+ buf.WriteByte('&')
+ }
+ buf.WriteString(keyEscaped)
+
+ // As opposed to the standard library, we append the = only if the value is not empty.
+ if value != "" {
+ buf.WriteByte('=')
+ buf.WriteString(url.QueryEscape(value))
+ }
+
+ i++
+ }
+ }
+ }
+
+ return buf.String()
+}
diff --git a/url/url_test.go b/url/url_test.go
index 9a14b20..54868a9 100644
--- a/url/url_test.go
+++ b/url/url_test.go
@@ -71,3 +71,25 @@ func TestDomain(t *testing.T) {
}
}
}
+
+func TestRequestURI(t *testing.T) {
+ scenarios := map[string]string{
+ "https://www.example.org": "https://www.example.org",
+ "https://user:password@www.example.org": "https://user:password@www.example.org",
+ "https://www.example.org/path with spaces": "https://www.example.org/path%20with%20spaces",
+ "https://www.example.org/path#test": "https://www.example.org/path",
+ "https://www.example.org/path?abc#test": "https://www.example.org/path?abc",
+ "https://www.example.org/path?a=b&a=c": "https://www.example.org/path?a=b&a=c",
+ "https://www.example.org/path?a=b&a=c&d": "https://www.example.org/path?a=b&a=c&d",
+ "https://www.example.org/path?atom": "https://www.example.org/path?atom",
+ "https://www.example.org/path?测试=测试": "https://www.example.org/path?%E6%B5%8B%E8%AF%95=%E6%B5%8B%E8%AF%95",
+ "https://www.example.org/url=http%3A%2F%2Fwww.example.com%2Ffeed%2F&max=20": "https://www.example.org/url=http%3A%2F%2Fwww.example.com%2Ffeed%2F&max=20",
+ }
+
+ for input, expected := range scenarios {
+ actual := RequestURI(input)
+ if actual != expected {
+ t.Errorf(`Unexpected result, got %q instead of %q`, actual, expected)
+ }
+ }
+}