summaryrefslogtreecommitdiff
path: root/Command
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-07-03 13:46:09 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-07-03 14:15:00 -0400
commit38e2750137330da8fcc7067a2fa2e5aa74be5125 (patch)
tree22b7d238cbe66ab5ed3444b91f448b7de5097dc5 /Command
parentff0cf010c169e7d4868994a7eca428496f9e54e6 (diff)
import metadata from feeds
When annex.genmetadata is set, metadata from the feed is added to files that are imported from it. Reused the same feedtitle and itemtitle, feedauthor, itemauthor, etc names that are used in --template. Also added title and author, which are the item title/author if available, falling back to the feed title/author. These are more likely to be common metadata fields. (There is a small bit of dupication here, but once git gets around to packing the object, it will compress it away.) The itempubdate field is not included in the metadata as a string; instead it is used to generate year and month fields, same as is done when adding files with annex.genmetadata set. This commit was sponsored by Amitai Schlair, who cooincidentially is responsible for ikiwiki generating nice feed metadata!
Diffstat (limited to 'Command')
-rw-r--r--Command/AddUrl.hs45
-rw-r--r--Command/ImportFeed.hs73
2 files changed, 78 insertions, 40 deletions
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index 7ffb86997..c21ce928f 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -97,15 +97,17 @@ performQuvi relaxed pageurl videourl file = ifAnnexed file addurl geturl
where
quviurl = setDownloader pageurl QuviDownloader
addurl key = next $ cleanup quviurl file key Nothing
- geturl = next $ addUrlFileQuvi relaxed quviurl videourl file
+ geturl = next $ isJust <$> addUrlFileQuvi relaxed quviurl videourl file
#endif
#ifdef WITH_QUVI
-addUrlFileQuvi :: Bool -> URLString -> URLString -> FilePath -> Annex Bool
+addUrlFileQuvi :: Bool -> URLString -> URLString -> FilePath -> Annex (Maybe Key)
addUrlFileQuvi relaxed quviurl videourl file = do
key <- Backend.URL.fromUrl quviurl Nothing
ifM (pure relaxed <||> Annex.getState Annex.fast)
- ( cleanup quviurl file key Nothing
+ ( do
+ cleanup' quviurl file key Nothing
+ return (Just key)
, do
{- Get the size, and use that to check
- disk space. However, the size info is not
@@ -113,7 +115,7 @@ addUrlFileQuvi relaxed quviurl videourl file = do
- might change and we want to be able to download
- it later. -}
sizedkey <- addSizeUrlKey videourl key
- prepGetViaTmpChecked sizedkey $ do
+ prepGetViaTmpChecked sizedkey Nothing $ do
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
showOutput
ok <- Transfer.notifyTransfer Transfer.Download (Just file) $
@@ -121,15 +123,17 @@ addUrlFileQuvi relaxed quviurl videourl file = do
liftIO $ createDirectoryIfMissing True (parentDir tmp)
downloadUrl [videourl] tmp
if ok
- then cleanup quviurl file key (Just tmp)
- else return False
+ then do
+ cleanup' quviurl file key (Just tmp)
+ return (Just key)
+ else return Nothing
)
#endif
perform :: Bool -> URLString -> FilePath -> CommandPerform
perform relaxed url file = ifAnnexed file addurl geturl
where
- geturl = next $ addUrlFile relaxed url file
+ geturl = next $ isJust <$> addUrlFile relaxed url file
addurl key
| relaxed = do
setUrlPresent key url
@@ -149,7 +153,7 @@ perform relaxed url file = ifAnnexed file addurl geturl
stop
)
-addUrlFile :: Bool -> URLString -> FilePath -> Annex Bool
+addUrlFile :: Bool -> URLString -> FilePath -> Annex (Maybe Key)
addUrlFile relaxed url file = do
liftIO $ createDirectoryIfMissing True (parentDir file)
ifM (Annex.getState Annex.fast <||> pure relaxed)
@@ -159,13 +163,13 @@ addUrlFile relaxed url file = do
download url file
)
-download :: URLString -> FilePath -> Annex Bool
+download :: URLString -> FilePath -> Annex (Maybe Key)
download url file = do
{- Generate a dummy key to use for this download, before we can
- examine the file and find its real key. This allows resuming
- downloads, as the dummy key for a given url is stable. -}
dummykey <- addSizeUrlKey url =<< Backend.URL.fromUrl url Nothing
- prepGetViaTmpChecked dummykey $ do
+ prepGetViaTmpChecked dummykey Nothing $ do
tmp <- fromRepo $ gitAnnexTmpObjectLocation dummykey
showOutput
ifM (runtransfer dummykey tmp)
@@ -178,9 +182,11 @@ download url file = do
}
k <- genKey source backend
case k of
- Nothing -> return False
- Just (key, _) -> cleanup url file key (Just tmp)
- , return False
+ Nothing -> return Nothing
+ Just (key, _) -> do
+ cleanup' url file key (Just tmp)
+ return (Just key)
+ , return Nothing
)
where
runtransfer dummykey tmp = Transfer.notifyTransfer Transfer.Download (Just file) $
@@ -200,6 +206,11 @@ addSizeUrlKey url key = do
cleanup :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex Bool
cleanup url file key mtmp = do
+ cleanup' url file key mtmp
+ return True
+
+cleanup' :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
+cleanup' url file key mtmp = do
when (isJust mtmp) $
logStatus key InfoPresent
setUrlPresent key url
@@ -210,9 +221,8 @@ cleanup url file key mtmp = do
- must already exist, so flush the queue. -}
Annex.Queue.flush
maybe noop (moveAnnex key) mtmp
- return True
-nodownload :: Bool -> URLString -> FilePath -> Annex Bool
+nodownload :: Bool -> URLString -> FilePath -> Annex (Maybe Key)
nodownload relaxed url file = do
(exists, size) <- if relaxed
then pure (True, Nothing)
@@ -220,10 +230,11 @@ nodownload relaxed url file = do
if exists
then do
key <- Backend.URL.fromUrl url size
- cleanup url file key Nothing
+ cleanup' url file key Nothing
+ return (Just key)
else do
warning $ "unable to access url: " ++ url
- return False
+ return Nothing
url2file :: URI -> Maybe Int -> Int -> FilePath
url2file url pathdepth pathmax = case pathdepth of
diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs
index 29f2fb148..71cd0dc82 100644
--- a/Command/ImportFeed.hs
+++ b/Command/ImportFeed.hs
@@ -33,6 +33,9 @@ import Annex.Quvi
import qualified Utility.Quvi as Quvi
import Command.AddUrl (addUrlFileQuvi)
#endif
+import Types.MetaData
+import Logs.MetaData
+import Annex.MetaData
def :: [Command]
def = [notBareRepo $ withOptions [templateOption, relaxedOption] $
@@ -165,12 +168,14 @@ performDownload relaxed cache todownload = case location todownload of
Nothing -> return True
Just f -> do
showStart "addurl" f
- ok <- getter f
- if ok
- then do
+ mk <- getter f
+ case mk of
+ Just key -> do
+ whenM (annexGenMetaData <$> Annex.getGitConfig) $
+ addMetaData key $ extractMetaData todownload
showEndOk
return True
- else do
+ Nothing -> do
showEndFail
checkFeedBroken (feedurl todownload)
@@ -198,32 +203,19 @@ performDownload relaxed cache todownload = case location todownload of
( return Nothing
, tryanother
)
-
+
defaultTemplate :: String
defaultTemplate = "${feedtitle}/${itemtitle}${extension}"
{- Generates a filename to use for a feed item by filling out the template.
- The filename may not be unique. -}
feedFile :: Utility.Format.Format -> ToDownload -> String -> FilePath
-feedFile tmpl i extension = Utility.Format.format tmpl $ M.fromList
- [ field "feedtitle" $ getFeedTitle $ feed i
- , fieldMaybe "itemtitle" $ getItemTitle $ item i
- , fieldMaybe "feedauthor" $ getFeedAuthor $ feed i
- , fieldMaybe "itemauthor" $ getItemAuthor $ item i
- , fieldMaybe "itemsummary" $ getItemSummary $ item i
- , fieldMaybe "itemdescription" $ getItemDescription $ item i
- , fieldMaybe "itemrights" $ getItemRights $ item i
- , fieldMaybe "itemid" $ snd <$> getItemId (item i)
- , fieldMaybe "itempubdate" $ pubdate $ item i
- , ("extension", sanitizeFilePath extension)
- ]
+feedFile tmpl i extension = Utility.Format.format tmpl $
+ M.map sanitizeFilePath $ M.fromList $ extractFields i ++
+ [ ("extension", extension)
+ , extractField "itempubdate" [pubdate $ item i]
+ ]
where
- field k v =
- let s = sanitizeFilePath v in
- if null s then (k, "none") else (k, s)
- fieldMaybe k Nothing = (k, "none")
- fieldMaybe k (Just v) = field k v
-
#if MIN_VERSION_feed(0,3,9)
pubdate itm = case getItemPublishDate itm :: Maybe (Maybe UTCTime) of
Just (Just d) -> Just $
@@ -234,6 +226,41 @@ feedFile tmpl i extension = Utility.Format.format tmpl $ M.fromList
pubdate _ = Nothing
#endif
+extractMetaData :: ToDownload -> MetaData
+extractMetaData i = case getItemPublishDate (item i) :: Maybe (Maybe UTCTime) of
+ Just (Just d) -> addDateMetaData d meta
+ _ -> meta
+ where
+ tometa (k, v) = (mkMetaFieldUnchecked k, S.singleton (toMetaValue v))
+ meta = MetaData $ M.fromList $ map tometa $ extractFields i
+
+{- Extract fields from the feed and item, that are both used as metadata,
+ - and to generate the filename. -}
+extractFields :: ToDownload -> [(String, String)]
+extractFields i = map (uncurry extractField)
+ [ ("feedtitle", [feedtitle])
+ , ("itemtitle", [itemtitle])
+ , ("feedauthor", [feedauthor])
+ , ("itemauthor", [itemauthor])
+ , ("itemsummary", [getItemSummary $ item i])
+ , ("itemdescription", [getItemDescription $ item i])
+ , ("itemrights", [getItemRights $ item i])
+ , ("itemid", [snd <$> getItemId (item i)])
+ , ("title", [itemtitle, feedtitle])
+ , ("author", [itemauthor, feedauthor])
+ ]
+ where
+ feedtitle = Just $ getFeedTitle $ feed i
+ itemtitle = getItemTitle $ item i
+ feedauthor = getFeedAuthor $ feed i
+ itemauthor = getItemAuthor $ item i
+
+extractField :: String -> [Maybe String] -> (String, String)
+extractField k [] = (k, "none")
+extractField k (Just v:_)
+ | not (null v) = (k, v)
+extractField k (_:rest) = extractField k rest
+
{- Called when there is a problem with a feed.
- Throws an error if the feed is broken, otherwise shows a warning. -}
feedProblem :: URLString -> String -> Annex ()