aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2019-11-28 21:21:00 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2019-11-28 21:33:32 -0800
commitf90e9dfab015a2ebdbbe66334a29477152a948f5 (patch)
tree58516faa31cf0435db1b9abccdbda62b37ba1ae0
parentc43c9458a95552f0a68084a91c631e9e45713a19 (diff)
Add support of media elements for RSS 2 feeds
-rw-r--r--reader/rss/parser_test.go119
-rw-r--r--reader/rss/rss.go121
2 files changed, 221 insertions, 19 deletions
diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go
index e604927..af98d7e 100644
--- a/reader/rss/parser_test.go
+++ b/reader/rss/parser_test.go
@@ -652,3 +652,122 @@ func TestParseWithInvalidCharacterEntity(t *testing.T) {
t.Errorf(`Incorrect url, got: %q`, feed.SiteURL)
}
}
+
+func TestParseEntryWithMediaGroup(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
+ <channel>
+ <title>My Example Feed</title>
+ <link>http://example.org</link>
+ <item>
+ <title>Example Item</title>
+ <link>http://www.example.org/entries/1</link>
+ <enclosure type="application/x-bittorrent" url="https://example.org/file3.torrent" length="670053113">
+ </enclosure>
+ <media:group>
+ <media:content type="application/x-bittorrent" url="https://example.org/file1.torrent"></media:content>
+ <media:content type="application/x-bittorrent" url="https://example.org/file2.torrent" isDefault="true"></media:content>
+ <media:content type="application/x-bittorrent" url="https://example.org/file3.torrent"></media:content>
+ <media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content>
+ <media:content type="application/x-bittorrent" url="https://example.org/file5.torrent" fileSize="42"></media:content>
+ <media:rating>nonadult</media:rating>
+ </media:group>
+ <media:thumbnail url="https://example.org/image.jpg" height="122" width="223"></media:thumbnail>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+ if len(feed.Entries[0].Enclosures) != 6 {
+ t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+ }
+
+ expectedResults := []struct {
+ url string
+ mimeType string
+ size int64
+ }{
+ {"https://example.org/image.jpg", "image/*", 0},
+ {"https://example.org/file3.torrent", "application/x-bittorrent", 670053113},
+ {"https://example.org/file1.torrent", "application/x-bittorrent", 0},
+ {"https://example.org/file2.torrent", "application/x-bittorrent", 0},
+ {"https://example.org/file4.torrent", "application/x-bittorrent", 0},
+ {"https://example.org/file5.torrent", "application/x-bittorrent", 42},
+ }
+
+ for index, enclosure := range feed.Entries[0].Enclosures {
+ if expectedResults[index].url != enclosure.URL {
+ t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url)
+ }
+
+ if expectedResults[index].mimeType != enclosure.MimeType {
+ t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType)
+ }
+
+ if expectedResults[index].size != enclosure.Size {
+ t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size)
+ }
+ }
+}
+
+func TestParseEntryWithMediaContent(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
+ <channel>
+ <title>My Example Feed</title>
+ <link>http://example.org</link>
+ <item>
+ <title>Example Item</title>
+ <link>http://www.example.org/entries/1</link>
+ <media:thumbnail url="https://example.org/thumbnail.jpg" />
+ <media:content url="https://example.org/media1.jpg" medium="image">
+ <media:title type="html">Some Title for Media 1</media:title>
+ </media:content>
+ <media:content url="https://example.org/media2.jpg" medium="image" />
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+ if len(feed.Entries[0].Enclosures) != 3 {
+ t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+ }
+
+ expectedResults := []struct {
+ url string
+ mimeType string
+ size int64
+ }{
+ {"https://example.org/thumbnail.jpg", "image/*", 0},
+ {"https://example.org/media1.jpg", "image/*", 0},
+ {"https://example.org/media2.jpg", "image/*", 0},
+ }
+
+ for index, enclosure := range feed.Entries[0].Enclosures {
+ if expectedResults[index].url != enclosure.URL {
+ t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url)
+ }
+
+ if expectedResults[index].mimeType != enclosure.MimeType {
+ t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType)
+ }
+
+ if expectedResults[index].size != enclosure.Size {
+ t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size)
+ }
+ }
+}
diff --git a/reader/rss/rss.go b/reader/rss/rss.go
index fa6c832..3803798 100644
--- a/reader/rss/rss.go
+++ b/reader/rss/rss.go
@@ -56,20 +56,71 @@ type rssEnclosure struct {
Length string `xml:"length,attr"`
}
+func (enclosure *rssEnclosure) Size() int64 {
+ if enclosure.Length == "" {
+ return 0
+ }
+ size, _ := strconv.ParseInt(enclosure.Length, 10, 0)
+ return size
+}
+
type rssItem struct {
- GUID string `xml:"guid"`
- Title string `xml:"title"`
- Links []rssLink `xml:"link"`
- OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
- CommentLinks []rssCommentLink `xml:"comments"`
- Description string `xml:"description"`
- EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
- PubDate string `xml:"pubDate"`
- Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
- Authors []rssAuthor `xml:"author"`
- Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
- EnclosureLinks []rssEnclosure `xml:"enclosure"`
- OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
+ GUID string `xml:"guid"`
+ Title string `xml:"title"`
+ Links []rssLink `xml:"link"`
+ OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
+ CommentLinks []rssCommentLink `xml:"comments"`
+ Description string `xml:"description"`
+ EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
+ PubDate string `xml:"pubDate"`
+ Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
+ Authors []rssAuthor `xml:"author"`
+ Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
+ EnclosureLinks []rssEnclosure `xml:"enclosure"`
+ OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
+ MediaGroup []rssMediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
+ MediaContents []rssMediaContent `xml:"http://search.yahoo.com/mrss/ content"`
+ MediaThumbnails []rssMediaThumbnails `xml:"http://search.yahoo.com/mrss/ thumbnail"`
+}
+
+type rssMediaGroup struct {
+ MediaList []rssMediaContent `xml:"content"`
+}
+
+type rssMediaContent struct {
+ URL string `xml:"url,attr"`
+ Type string `xml:"type,attr"`
+ FileSize string `xml:"fileSize,attr"`
+ Medium string `xml:"medium,attr"`
+}
+
+func (mediaContent *rssMediaContent) MimeType() string {
+ switch {
+ case mediaContent.Type == "" && mediaContent.Medium == "image":
+ return "image/*"
+ case mediaContent.Type == "" && mediaContent.Medium == "video":
+ return "video/*"
+ case mediaContent.Type == "" && mediaContent.Medium == "audio":
+ return "audio/*"
+ case mediaContent.Type == "" && mediaContent.Medium == "video":
+ return "video/*"
+ case mediaContent.Type != "":
+ return mediaContent.Type
+ default:
+ return "application/octet-stream"
+ }
+}
+
+func (mediaContent *rssMediaContent) Size() int64 {
+ if mediaContent.FileSize == "" {
+ return 0
+ }
+ size, _ := strconv.ParseInt(mediaContent.FileSize, 10, 0)
+ return size
+}
+
+type rssMediaThumbnails struct {
+ URL string `xml:"url,attr"`
}
func (r *rssFeed) SiteURL() string {
@@ -200,9 +251,20 @@ func (r *rssItem) URL() string {
func (r *rssItem) Enclosures() model.EnclosureList {
enclosures := make(model.EnclosureList, 0)
+ duplicates := make(map[string]bool, 0)
+
+ for _, mediaThumbnail := range r.MediaThumbnails {
+ if _, found := duplicates[mediaThumbnail.URL]; !found {
+ duplicates[mediaThumbnail.URL] = true
+ enclosures = append(enclosures, &model.Enclosure{
+ URL: mediaThumbnail.URL,
+ MimeType: "image/*",
+ Size: 0,
+ })
+ }
+ }
for _, enclosure := range r.EnclosureLinks {
- length, _ := strconv.ParseInt(enclosure.Length, 10, 0)
enclosureURL := enclosure.URL
if r.OrigEnclosureLink != "" {
@@ -212,11 +274,32 @@ func (r *rssItem) Enclosures() model.EnclosureList {
}
}
- enclosures = append(enclosures, &model.Enclosure{
- URL: enclosureURL,
- MimeType: enclosure.Type,
- Size: length,
- })
+ if _, found := duplicates[enclosureURL]; !found {
+ duplicates[enclosureURL] = true
+
+ enclosures = append(enclosures, &model.Enclosure{
+ URL: enclosureURL,
+ MimeType: enclosure.Type,
+ Size: enclosure.Size(),
+ })
+ }
+ }
+
+ for _, mediaContentItem := range r.MediaGroup {
+ for _, mediaContent := range mediaContentItem.MediaList {
+ r.MediaContents = append(r.MediaContents, mediaContent)
+ }
+ }
+
+ for _, mediaContent := range r.MediaContents {
+ if _, found := duplicates[mediaContent.URL]; !found {
+ duplicates[mediaContent.URL] = true
+ enclosures = append(enclosures, &model.Enclosure{
+ URL: mediaContent.URL,
+ MimeType: mediaContent.MimeType(),
+ Size: mediaContent.Size(),
+ })
+ }
}
return enclosures