diff options
author | Frédéric Guillot <fred@miniflux.net> | 2019-11-28 21:21:00 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2019-11-28 21:33:32 -0800 |
commit | f90e9dfab015a2ebdbbe66334a29477152a948f5 (patch) | |
tree | 58516faa31cf0435db1b9abccdbda62b37ba1ae0 | |
parent | c43c9458a95552f0a68084a91c631e9e45713a19 (diff) |
Add support of media elements for RSS 2 feeds
-rw-r--r-- | reader/rss/parser_test.go | 119 | ||||
-rw-r--r-- | reader/rss/rss.go | 121 |
2 files changed, 221 insertions, 19 deletions
diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index e604927..af98d7e 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -652,3 +652,122 @@ func TestParseWithInvalidCharacterEntity(t *testing.T) { t.Errorf(`Incorrect url, got: %q`, feed.SiteURL) } } + +func TestParseEntryWithMediaGroup(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"> + <channel> + <title>My Example Feed</title> + <link>http://example.org</link> + <item> + <title>Example Item</title> + <link>http://www.example.org/entries/1</link> + <enclosure type="application/x-bittorrent" url="https://example.org/file3.torrent" length="670053113"> + </enclosure> + <media:group> + <media:content type="application/x-bittorrent" url="https://example.org/file1.torrent"></media:content> + <media:content type="application/x-bittorrent" url="https://example.org/file2.torrent" isDefault="true"></media:content> + <media:content type="application/x-bittorrent" url="https://example.org/file3.torrent"></media:content> + <media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content> + <media:content type="application/x-bittorrent" url="https://example.org/file5.torrent" fileSize="42"></media:content> + <media:rating>nonadult</media:rating> + </media:group> + <media:thumbnail url="https://example.org/image.jpg" height="122" width="223"></media:thumbnail> + </item> + </channel> + </rss>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + if len(feed.Entries[0].Enclosures) != 6 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"https://example.org/image.jpg", "image/*", 0}, + {"https://example.org/file3.torrent", "application/x-bittorrent", 670053113}, + {"https://example.org/file1.torrent", "application/x-bittorrent", 0}, + {"https://example.org/file2.torrent", "application/x-bittorrent", 0}, + {"https://example.org/file4.torrent", "application/x-bittorrent", 0}, + {"https://example.org/file5.torrent", "application/x-bittorrent", 42}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} + +func TestParseEntryWithMediaContent(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"> + <channel> + <title>My Example Feed</title> + <link>http://example.org</link> + <item> + <title>Example Item</title> + <link>http://www.example.org/entries/1</link> + <media:thumbnail url="https://example.org/thumbnail.jpg" /> + <media:content url="https://example.org/media1.jpg" medium="image"> + <media:title type="html">Some Title for Media 1</media:title> + </media:content> + <media:content url="https://example.org/media2.jpg" medium="image" /> + </item> + </channel> + </rss>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + if len(feed.Entries[0].Enclosures) != 3 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"https://example.org/thumbnail.jpg", "image/*", 0}, + {"https://example.org/media1.jpg", "image/*", 0}, + {"https://example.org/media2.jpg", "image/*", 0}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} diff --git a/reader/rss/rss.go b/reader/rss/rss.go index fa6c832..3803798 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -56,20 +56,71 @@ type rssEnclosure struct { Length string `xml:"length,attr"` } +func (enclosure *rssEnclosure) Size() int64 { + if enclosure.Length == "" { + return 0 + } + size, _ := strconv.ParseInt(enclosure.Length, 10, 0) + return size +} + type rssItem struct { - GUID string `xml:"guid"` - Title string `xml:"title"` - Links []rssLink `xml:"link"` - OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` - CommentLinks []rssCommentLink `xml:"comments"` - Description string `xml:"description"` - EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` - PubDate string `xml:"pubDate"` - Date string `xml:"http://purl.org/dc/elements/1.1/ date"` - Authors []rssAuthor `xml:"author"` - Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"` - EnclosureLinks []rssEnclosure `xml:"enclosure"` - OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"` + GUID string `xml:"guid"` + Title string `xml:"title"` + Links []rssLink `xml:"link"` + OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` + CommentLinks []rssCommentLink `xml:"comments"` + Description string `xml:"description"` + EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` + PubDate string `xml:"pubDate"` + Date string `xml:"http://purl.org/dc/elements/1.1/ date"` + Authors []rssAuthor `xml:"author"` + Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"` + EnclosureLinks []rssEnclosure `xml:"enclosure"` + OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"` + MediaGroup []rssMediaGroup `xml:"http://search.yahoo.com/mrss/ group"` + MediaContents []rssMediaContent `xml:"http://search.yahoo.com/mrss/ content"` + MediaThumbnails []rssMediaThumbnails `xml:"http://search.yahoo.com/mrss/ thumbnail"` +} + +type rssMediaGroup struct { + MediaList []rssMediaContent `xml:"content"` +} + +type rssMediaContent struct { + URL string `xml:"url,attr"` + Type string `xml:"type,attr"` + FileSize string `xml:"fileSize,attr"` + Medium string `xml:"medium,attr"` +} + +func (mediaContent *rssMediaContent) MimeType() string { + switch { + case mediaContent.Type == "" && mediaContent.Medium == "image": + return "image/*" + case mediaContent.Type == "" && mediaContent.Medium == "video": + return "video/*" + case mediaContent.Type == "" && mediaContent.Medium == "audio": + return "audio/*" + case mediaContent.Type == "" && mediaContent.Medium == "video": + return "video/*" + case mediaContent.Type != "": + return mediaContent.Type + default: + return "application/octet-stream" + } +} + +func (mediaContent *rssMediaContent) Size() int64 { + if mediaContent.FileSize == "" { + return 0 + } + size, _ := strconv.ParseInt(mediaContent.FileSize, 10, 0) + return size +} + +type rssMediaThumbnails struct { + URL string `xml:"url,attr"` } func (r *rssFeed) SiteURL() string { @@ -200,9 +251,20 @@ func (r *rssItem) URL() string { func (r *rssItem) Enclosures() model.EnclosureList { enclosures := make(model.EnclosureList, 0) + duplicates := make(map[string]bool, 0) + + for _, mediaThumbnail := range r.MediaThumbnails { + if _, found := duplicates[mediaThumbnail.URL]; !found { + duplicates[mediaThumbnail.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaThumbnail.URL, + MimeType: "image/*", + Size: 0, + }) + } + } for _, enclosure := range r.EnclosureLinks { - length, _ := strconv.ParseInt(enclosure.Length, 10, 0) enclosureURL := enclosure.URL if r.OrigEnclosureLink != "" { @@ -212,11 +274,32 @@ func (r *rssItem) Enclosures() model.EnclosureList { } } - enclosures = append(enclosures, &model.Enclosure{ - URL: enclosureURL, - MimeType: enclosure.Type, - Size: length, - }) + if _, found := duplicates[enclosureURL]; !found { + duplicates[enclosureURL] = true + + enclosures = append(enclosures, &model.Enclosure{ + URL: enclosureURL, + MimeType: enclosure.Type, + Size: enclosure.Size(), + }) + } + } + + for _, mediaContentItem := range r.MediaGroup { + for _, mediaContent := range mediaContentItem.MediaList { + r.MediaContents = append(r.MediaContents, mediaContent) + } + } + + for _, mediaContent := range r.MediaContents { + if _, found := duplicates[mediaContent.URL]; !found { + duplicates[mediaContent.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaContent.URL, + MimeType: mediaContent.MimeType(), + Size: mediaContent.Size(), + }) + } } return enclosures |