summaryrefslogtreecommitdiff
path: root/Command/ImportFeed.hs
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2015-03-31 13:29:51 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2015-03-31 13:30:13 -0400
commit23e90f6f3cb5a570e8bf9eb3d2798c27a5ba2537 (patch)
tree6d6eab72231cb43bce52f0b332315b044b91341f /Command/ImportFeed.hs
parent41f7627baee212ce4a4a75126109d9569c790dd0 (diff)
importfeed: Avoid downloading a redundant item from a feed whose guid has been downloaded before, even when the url has changed.
To support this, always store itemid in metadata; before this was only done when annex.genmetadata was set.
Diffstat (limited to 'Command/ImportFeed.hs')
-rw-r--r--Command/ImportFeed.hs49
1 files changed, 40 insertions, 9 deletions
diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs
index 231c921c3..b60627cfe 100644
--- a/Command/ImportFeed.hs
+++ b/Command/ImportFeed.hs
@@ -73,7 +73,7 @@ perform opts cache url = do
v <- findDownloads url
case v of
[] -> do
- feedProblem url "bad feed content"
+ feedProblem url "bad feed content; no enclosures to download"
next $ return True
l -> do
ok <- and <$> mapM (performDownload opts cache) l
@@ -96,21 +96,32 @@ data ToDownload = ToDownload
data DownloadLocation = Enclosure URLString | QuviLink URLString
+type ItemId = String
+
data Cache = Cache
{ knownurls :: S.Set URLString
+ , knownitems :: S.Set ItemId
, template :: Utility.Format.Format
}
getCache :: Maybe String -> Annex Cache
getCache opttemplate = ifM (Annex.getState Annex.force)
- ( ret S.empty
+ ( ret S.empty S.empty
, do
showSideAction "checking known urls"
- ret =<< S.fromList <$> knownUrls
+ (is, us) <- unzip <$> (mapM knownItems =<< knownUrls)
+ ret (S.fromList us) (S.fromList (concat is))
)
where
tmpl = Utility.Format.gen $ fromMaybe defaultTemplate opttemplate
- ret s = return $ Cache s tmpl
+ ret us is = return $ Cache us is tmpl
+
+knownItems :: (Key, URLString) -> Annex ([ItemId], URLString)
+knownItems (k, u) = do
+ itemids <- S.toList . S.filter (/= noneValue) . S.map fromMetaValue
+ . currentMetaDataValues itemIdField
+ <$> getCurrentMetaData k
+ return (itemids, u)
findDownloads :: URLString -> Annex [ToDownload]
findDownloads u = go =<< downloadFeed u
@@ -191,12 +202,18 @@ performDownload opts cache todownload = case location todownload of
where
forced = Annex.getState Annex.force
- {- Avoids downloading any urls that are already known to be
+ {- Avoids downloading any items that are already known to be
- associated with a file in the annex, unless forced. -}
checkknown url a
- | S.member url (knownurls cache) = ifM forced (a, return True)
+ | knownitemid || S.member url (knownurls cache)
+ = ifM forced (a, return True)
| otherwise = a
+ knownitemid = case getItemId (item todownload) of
+ -- only when it's a permalink
+ Just (True, itemid) -> S.member itemid (knownitems cache)
+ _ -> False
+
rundownload url extension getter = do
dest <- makeunique url (1 :: Integer) $
feedFile (template cache) todownload extension
@@ -211,8 +228,10 @@ performDownload opts cache todownload = case location todownload of
checkFeedBroken (feedurl todownload)
else do
forM_ ks $ \key ->
- whenM (annexGenMetaData <$> Annex.getGitConfig) $
- addMetaData key $ extractMetaData todownload
+ ifM (annexGenMetaData <$> Annex.getGitConfig)
+ ( addMetaData key $ extractMetaData todownload
+ , addMetaData key $ minimalMetaData todownload
+ )
showEndOk
return True
@@ -275,6 +294,12 @@ extractMetaData i = meta
tometa (k, v) = (mkMetaFieldUnchecked k, S.singleton (toMetaValue v))
meta = MetaData $ M.fromList $ map tometa $ extractFields i
+minimalMetaData :: ToDownload -> MetaData
+minimalMetaData i = case getItemId (item i) of
+ (Nothing) -> emptyMetaData
+ (Just (_, itemid)) -> MetaData $ M.singleton itemIdField
+ (S.singleton $ toMetaValue itemid)
+
{- Extract fields from the feed and item, that are both used as metadata,
- and to generate the filename. -}
extractFields :: ToDownload -> [(String, String)]
@@ -296,12 +321,18 @@ extractFields i = map (uncurry extractField)
feedauthor = getFeedAuthor $ feed i
itemauthor = getItemAuthor $ item i
+itemIdField :: MetaField
+itemIdField = mkMetaFieldUnchecked "itemid"
+
extractField :: String -> [Maybe String] -> (String, String)
-extractField k [] = (k, "none")
+extractField k [] = (k, noneValue)
extractField k (Just v:_)
| not (null v) = (k, v)
extractField k (_:rest) = extractField k rest
+noneValue :: String
+noneValue = "none"
+
{- Called when there is a problem with a feed.
- Throws an error if the feed is broken, otherwise shows a warning. -}
feedProblem :: URLString -> String -> Annex ()