From f90e9dfab015a2ebdbbe66334a29477152a948f5 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Thu, 28 Nov 2019 21:21:00 -0800 Subject: Add support of media elements for RSS 2 feeds --- reader/rss/parser_test.go | 119 +++++++++++++++++++++++++++++++++++++++++++++ reader/rss/rss.go | 121 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 221 insertions(+), 19 deletions(-) diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index e604927..af98d7e 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -652,3 +652,122 @@ func TestParseWithInvalidCharacterEntity(t *testing.T) { t.Errorf(`Incorrect url, got: %q`, feed.SiteURL) } } + +func TestParseEntryWithMediaGroup(t *testing.T) { + data := ` + + + My Example Feed + http://example.org + + Example Item + http://www.example.org/entries/1 + + + + + + + + + nonadult + + + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + if len(feed.Entries[0].Enclosures) != 6 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"https://example.org/image.jpg", "image/*", 0}, + {"https://example.org/file3.torrent", "application/x-bittorrent", 670053113}, + {"https://example.org/file1.torrent", "application/x-bittorrent", 0}, + {"https://example.org/file2.torrent", "application/x-bittorrent", 0}, + {"https://example.org/file4.torrent", "application/x-bittorrent", 0}, + {"https://example.org/file5.torrent", "application/x-bittorrent", 42}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} + +func TestParseEntryWithMediaContent(t *testing.T) { + data := ` + + + My Example Feed + http://example.org + + Example Item + http://www.example.org/entries/1 + + + Some Title for Media 1 + + + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + if len(feed.Entries[0].Enclosures) != 3 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"https://example.org/thumbnail.jpg", "image/*", 0}, + {"https://example.org/media1.jpg", "image/*", 0}, + {"https://example.org/media2.jpg", "image/*", 0}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} diff --git a/reader/rss/rss.go b/reader/rss/rss.go index fa6c832..3803798 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -56,20 +56,71 @@ type rssEnclosure struct { Length string `xml:"length,attr"` } +func (enclosure *rssEnclosure) Size() int64 { + if enclosure.Length == "" { + return 0 + } + size, _ := strconv.ParseInt(enclosure.Length, 10, 0) + return size +} + type rssItem struct { - GUID string `xml:"guid"` - Title string `xml:"title"` - Links []rssLink `xml:"link"` - OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` - CommentLinks []rssCommentLink `xml:"comments"` - Description string `xml:"description"` - EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` - PubDate string `xml:"pubDate"` - Date string `xml:"http://purl.org/dc/elements/1.1/ date"` - Authors []rssAuthor `xml:"author"` - Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"` - EnclosureLinks []rssEnclosure `xml:"enclosure"` - OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"` + GUID string `xml:"guid"` + Title string `xml:"title"` + Links []rssLink `xml:"link"` + OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` + CommentLinks []rssCommentLink `xml:"comments"` + Description string `xml:"description"` + EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` + PubDate string `xml:"pubDate"` + Date string `xml:"http://purl.org/dc/elements/1.1/ date"` + Authors []rssAuthor `xml:"author"` + Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"` + EnclosureLinks []rssEnclosure `xml:"enclosure"` + OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"` + MediaGroup []rssMediaGroup `xml:"http://search.yahoo.com/mrss/ group"` + MediaContents []rssMediaContent `xml:"http://search.yahoo.com/mrss/ content"` + MediaThumbnails []rssMediaThumbnails `xml:"http://search.yahoo.com/mrss/ thumbnail"` +} + +type rssMediaGroup struct { + MediaList []rssMediaContent `xml:"content"` +} + +type rssMediaContent struct { + URL string `xml:"url,attr"` + Type string `xml:"type,attr"` + FileSize string `xml:"fileSize,attr"` + Medium string `xml:"medium,attr"` +} + +func (mediaContent *rssMediaContent) MimeType() string { + switch { + case mediaContent.Type == "" && mediaContent.Medium == "image": + return "image/*" + case mediaContent.Type == "" && mediaContent.Medium == "video": + return "video/*" + case mediaContent.Type == "" && mediaContent.Medium == "audio": + return "audio/*" + case mediaContent.Type == "" && mediaContent.Medium == "video": + return "video/*" + case mediaContent.Type != "": + return mediaContent.Type + default: + return "application/octet-stream" + } +} + +func (mediaContent *rssMediaContent) Size() int64 { + if mediaContent.FileSize == "" { + return 0 + } + size, _ := strconv.ParseInt(mediaContent.FileSize, 10, 0) + return size +} + +type rssMediaThumbnails struct { + URL string `xml:"url,attr"` } func (r *rssFeed) SiteURL() string { @@ -200,9 +251,20 @@ func (r *rssItem) URL() string { func (r *rssItem) Enclosures() model.EnclosureList { enclosures := make(model.EnclosureList, 0) + duplicates := make(map[string]bool, 0) + + for _, mediaThumbnail := range r.MediaThumbnails { + if _, found := duplicates[mediaThumbnail.URL]; !found { + duplicates[mediaThumbnail.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaThumbnail.URL, + MimeType: "image/*", + Size: 0, + }) + } + } for _, enclosure := range r.EnclosureLinks { - length, _ := strconv.ParseInt(enclosure.Length, 10, 0) enclosureURL := enclosure.URL if r.OrigEnclosureLink != "" { @@ -212,11 +274,32 @@ func (r *rssItem) Enclosures() model.EnclosureList { } } - enclosures = append(enclosures, &model.Enclosure{ - URL: enclosureURL, - MimeType: enclosure.Type, - Size: length, - }) + if _, found := duplicates[enclosureURL]; !found { + duplicates[enclosureURL] = true + + enclosures = append(enclosures, &model.Enclosure{ + URL: enclosureURL, + MimeType: enclosure.Type, + Size: enclosure.Size(), + }) + } + } + + for _, mediaContentItem := range r.MediaGroup { + for _, mediaContent := range mediaContentItem.MediaList { + r.MediaContents = append(r.MediaContents, mediaContent) + } + } + + for _, mediaContent := range r.MediaContents { + if _, found := duplicates[mediaContent.URL]; !found { + duplicates[mediaContent.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaContent.URL, + MimeType: mediaContent.MimeType(), + Size: mediaContent.Size(), + }) + } } return enclosures -- cgit v1.2.3