diff options
author | Joey Hess <joey@kitenet.net> | 2014-07-03 13:46:09 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2014-07-03 14:15:00 -0400 |
commit | 38e2750137330da8fcc7067a2fa2e5aa74be5125 (patch) | |
tree | 22b7d238cbe66ab5ed3444b91f448b7de5097dc5 | |
parent | ff0cf010c169e7d4868994a7eca428496f9e54e6 (diff) |
import metadata from feeds
When annex.genmetadata is set, metadata from the feed is added to files
that are imported from it.
Reused the same feedtitle and itemtitle, feedauthor, itemauthor, etc names
that are used in --template.
Also added title and author, which are the item title/author if available,
falling back to the feed title/author. These are more likely to be common
metadata fields.
(There is a small bit of dupication here, but once git gets
around to packing the object, it will compress it away.)
The itempubdate field is not included in the metadata as a string; instead
it is used to generate year and month fields, same as is done when adding
files with annex.genmetadata set.
This commit was sponsored by Amitai Schlair, who cooincidentially
is responsible for ikiwiki generating nice feed metadata!
-rw-r--r-- | Annex/Content.hs | 8 | ||||
-rw-r--r-- | Annex/MetaData.hs | 19 | ||||
-rw-r--r-- | Command/AddUrl.hs | 45 | ||||
-rw-r--r-- | Command/ImportFeed.hs | 73 | ||||
-rw-r--r-- | Logs/MetaData.hs | 10 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 10 |
7 files changed, 106 insertions, 61 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs index aaae595aa..8ad3d5e65 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -218,7 +218,7 @@ getViaTmpUnchecked = finishGetViaTmp (return True) getViaTmpChecked :: Annex Bool -> Key -> (FilePath -> Annex Bool) -> Annex Bool getViaTmpChecked check key action = - prepGetViaTmpChecked key $ + prepGetViaTmpChecked key False $ finishGetViaTmp check key action {- Prepares to download a key via a tmp file, and checks that there is @@ -229,8 +229,8 @@ getViaTmpChecked check key action = - - Wen there's enough free space, runs the download action. -} -prepGetViaTmpChecked :: Key -> Annex Bool -> Annex Bool -prepGetViaTmpChecked key getkey = do +prepGetViaTmpChecked :: Key -> a -> Annex a -> Annex a +prepGetViaTmpChecked key unabletoget getkey = do tmp <- fromRepo $ gitAnnexTmpObjectLocation key e <- liftIO $ doesFileExist tmp @@ -242,7 +242,7 @@ prepGetViaTmpChecked key getkey = do -- The tmp file may not have been left writable when e $ thawContent tmp getkey - , return False + , return unabletoget ) finishGetViaTmp :: Annex Bool -> Key -> (FilePath -> Annex Bool) -> Annex Bool diff --git a/Annex/MetaData.hs b/Annex/MetaData.hs index f382f0ab1..f1b79e3f4 100644 --- a/Annex/MetaData.hs +++ b/Annex/MetaData.hs @@ -7,6 +7,7 @@ module Annex.MetaData ( genMetaData, + addDateMetaData, module X ) where @@ -37,20 +38,18 @@ genMetaData :: Key -> FilePath -> FileStatus -> Annex () genMetaData key file status = do maybe noop (flip copyMetaData key) =<< catKeyFileHEAD file whenM (annexGenMetaData <$> Annex.getGitConfig) $ do - metadata <- getCurrentMetaData key - let metadata' = genMetaData' status metadata - unless (metadata' == emptyMetaData) $ - addMetaData key metadata' + curr <- getCurrentMetaData key + addMetaData key (addDateMetaData mtime curr) + where + mtime = posixSecondsToUTCTime $ realToFrac $ modificationTime status -{- Generates metadata from the FileStatus. +{- Generates metadata for a file's date stamp. - Does not overwrite any existing metadata values. -} -genMetaData' :: FileStatus -> MetaData -> MetaData -genMetaData' status old = MetaData $ M.fromList $ filter isnew +addDateMetaData :: UTCTime -> MetaData -> MetaData +addDateMetaData mtime old = MetaData $ M.fromList $ filter isnew [ (yearMetaField, S.singleton $ toMetaValue $ show y) , (monthMetaField, S.singleton $ toMetaValue $ show m) ] where isnew (f, _) = S.null (currentMetaDataValues f old) - (y, m, _d) = toGregorian $ utctDay $ - posixSecondsToUTCTime $ realToFrac $ - modificationTime status + (y, m, _d) = toGregorian $ utctDay $ mtime diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 7ffb86997..c21ce928f 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -97,15 +97,17 @@ performQuvi relaxed pageurl videourl file = ifAnnexed file addurl geturl where quviurl = setDownloader pageurl QuviDownloader addurl key = next $ cleanup quviurl file key Nothing - geturl = next $ addUrlFileQuvi relaxed quviurl videourl file + geturl = next $ isJust <$> addUrlFileQuvi relaxed quviurl videourl file #endif #ifdef WITH_QUVI -addUrlFileQuvi :: Bool -> URLString -> URLString -> FilePath -> Annex Bool +addUrlFileQuvi :: Bool -> URLString -> URLString -> FilePath -> Annex (Maybe Key) addUrlFileQuvi relaxed quviurl videourl file = do key <- Backend.URL.fromUrl quviurl Nothing ifM (pure relaxed <||> Annex.getState Annex.fast) - ( cleanup quviurl file key Nothing + ( do + cleanup' quviurl file key Nothing + return (Just key) , do {- Get the size, and use that to check - disk space. However, the size info is not @@ -113,7 +115,7 @@ addUrlFileQuvi relaxed quviurl videourl file = do - might change and we want to be able to download - it later. -} sizedkey <- addSizeUrlKey videourl key - prepGetViaTmpChecked sizedkey $ do + prepGetViaTmpChecked sizedkey Nothing $ do tmp <- fromRepo $ gitAnnexTmpObjectLocation key showOutput ok <- Transfer.notifyTransfer Transfer.Download (Just file) $ @@ -121,15 +123,17 @@ addUrlFileQuvi relaxed quviurl videourl file = do liftIO $ createDirectoryIfMissing True (parentDir tmp) downloadUrl [videourl] tmp if ok - then cleanup quviurl file key (Just tmp) - else return False + then do + cleanup' quviurl file key (Just tmp) + return (Just key) + else return Nothing ) #endif perform :: Bool -> URLString -> FilePath -> CommandPerform perform relaxed url file = ifAnnexed file addurl geturl where - geturl = next $ addUrlFile relaxed url file + geturl = next $ isJust <$> addUrlFile relaxed url file addurl key | relaxed = do setUrlPresent key url @@ -149,7 +153,7 @@ perform relaxed url file = ifAnnexed file addurl geturl stop ) -addUrlFile :: Bool -> URLString -> FilePath -> Annex Bool +addUrlFile :: Bool -> URLString -> FilePath -> Annex (Maybe Key) addUrlFile relaxed url file = do liftIO $ createDirectoryIfMissing True (parentDir file) ifM (Annex.getState Annex.fast <||> pure relaxed) @@ -159,13 +163,13 @@ addUrlFile relaxed url file = do download url file ) -download :: URLString -> FilePath -> Annex Bool +download :: URLString -> FilePath -> Annex (Maybe Key) download url file = do {- Generate a dummy key to use for this download, before we can - examine the file and find its real key. This allows resuming - downloads, as the dummy key for a given url is stable. -} dummykey <- addSizeUrlKey url =<< Backend.URL.fromUrl url Nothing - prepGetViaTmpChecked dummykey $ do + prepGetViaTmpChecked dummykey Nothing $ do tmp <- fromRepo $ gitAnnexTmpObjectLocation dummykey showOutput ifM (runtransfer dummykey tmp) @@ -178,9 +182,11 @@ download url file = do } k <- genKey source backend case k of - Nothing -> return False - Just (key, _) -> cleanup url file key (Just tmp) - , return False + Nothing -> return Nothing + Just (key, _) -> do + cleanup' url file key (Just tmp) + return (Just key) + , return Nothing ) where runtransfer dummykey tmp = Transfer.notifyTransfer Transfer.Download (Just file) $ @@ -200,6 +206,11 @@ addSizeUrlKey url key = do cleanup :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex Bool cleanup url file key mtmp = do + cleanup' url file key mtmp + return True + +cleanup' :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex () +cleanup' url file key mtmp = do when (isJust mtmp) $ logStatus key InfoPresent setUrlPresent key url @@ -210,9 +221,8 @@ cleanup url file key mtmp = do - must already exist, so flush the queue. -} Annex.Queue.flush maybe noop (moveAnnex key) mtmp - return True -nodownload :: Bool -> URLString -> FilePath -> Annex Bool +nodownload :: Bool -> URLString -> FilePath -> Annex (Maybe Key) nodownload relaxed url file = do (exists, size) <- if relaxed then pure (True, Nothing) @@ -220,10 +230,11 @@ nodownload relaxed url file = do if exists then do key <- Backend.URL.fromUrl url size - cleanup url file key Nothing + cleanup' url file key Nothing + return (Just key) else do warning $ "unable to access url: " ++ url - return False + return Nothing url2file :: URI -> Maybe Int -> Int -> FilePath url2file url pathdepth pathmax = case pathdepth of diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 29f2fb148..71cd0dc82 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -33,6 +33,9 @@ import Annex.Quvi import qualified Utility.Quvi as Quvi import Command.AddUrl (addUrlFileQuvi) #endif +import Types.MetaData +import Logs.MetaData +import Annex.MetaData def :: [Command] def = [notBareRepo $ withOptions [templateOption, relaxedOption] $ @@ -165,12 +168,14 @@ performDownload relaxed cache todownload = case location todownload of Nothing -> return True Just f -> do showStart "addurl" f - ok <- getter f - if ok - then do + mk <- getter f + case mk of + Just key -> do + whenM (annexGenMetaData <$> Annex.getGitConfig) $ + addMetaData key $ extractMetaData todownload showEndOk return True - else do + Nothing -> do showEndFail checkFeedBroken (feedurl todownload) @@ -198,32 +203,19 @@ performDownload relaxed cache todownload = case location todownload of ( return Nothing , tryanother ) - + defaultTemplate :: String defaultTemplate = "${feedtitle}/${itemtitle}${extension}" {- Generates a filename to use for a feed item by filling out the template. - The filename may not be unique. -} feedFile :: Utility.Format.Format -> ToDownload -> String -> FilePath -feedFile tmpl i extension = Utility.Format.format tmpl $ M.fromList - [ field "feedtitle" $ getFeedTitle $ feed i - , fieldMaybe "itemtitle" $ getItemTitle $ item i - , fieldMaybe "feedauthor" $ getFeedAuthor $ feed i - , fieldMaybe "itemauthor" $ getItemAuthor $ item i - , fieldMaybe "itemsummary" $ getItemSummary $ item i - , fieldMaybe "itemdescription" $ getItemDescription $ item i - , fieldMaybe "itemrights" $ getItemRights $ item i - , fieldMaybe "itemid" $ snd <$> getItemId (item i) - , fieldMaybe "itempubdate" $ pubdate $ item i - , ("extension", sanitizeFilePath extension) - ] +feedFile tmpl i extension = Utility.Format.format tmpl $ + M.map sanitizeFilePath $ M.fromList $ extractFields i ++ + [ ("extension", extension) + , extractField "itempubdate" [pubdate $ item i] + ] where - field k v = - let s = sanitizeFilePath v in - if null s then (k, "none") else (k, s) - fieldMaybe k Nothing = (k, "none") - fieldMaybe k (Just v) = field k v - #if MIN_VERSION_feed(0,3,9) pubdate itm = case getItemPublishDate itm :: Maybe (Maybe UTCTime) of Just (Just d) -> Just $ @@ -234,6 +226,41 @@ feedFile tmpl i extension = Utility.Format.format tmpl $ M.fromList pubdate _ = Nothing #endif +extractMetaData :: ToDownload -> MetaData +extractMetaData i = case getItemPublishDate (item i) :: Maybe (Maybe UTCTime) of + Just (Just d) -> addDateMetaData d meta + _ -> meta + where + tometa (k, v) = (mkMetaFieldUnchecked k, S.singleton (toMetaValue v)) + meta = MetaData $ M.fromList $ map tometa $ extractFields i + +{- Extract fields from the feed and item, that are both used as metadata, + - and to generate the filename. -} +extractFields :: ToDownload -> [(String, String)] +extractFields i = map (uncurry extractField) + [ ("feedtitle", [feedtitle]) + , ("itemtitle", [itemtitle]) + , ("feedauthor", [feedauthor]) + , ("itemauthor", [itemauthor]) + , ("itemsummary", [getItemSummary $ item i]) + , ("itemdescription", [getItemDescription $ item i]) + , ("itemrights", [getItemRights $ item i]) + , ("itemid", [snd <$> getItemId (item i)]) + , ("title", [itemtitle, feedtitle]) + , ("author", [itemauthor, feedauthor]) + ] + where + feedtitle = Just $ getFeedTitle $ feed i + itemtitle = getItemTitle $ item i + feedauthor = getFeedAuthor $ feed i + itemauthor = getItemAuthor $ item i + +extractField :: String -> [Maybe String] -> (String, String) +extractField k [] = (k, "none") +extractField k (Just v:_) + | not (null v) = (k, v) +extractField k (_:rest) = extractField k rest + {- Called when there is a problem with a feed. - Throws an error if the feed is broken, otherwise shows a warning. -} feedProblem :: URLString -> String -> Annex () diff --git a/Logs/MetaData.hs b/Logs/MetaData.hs index b682ca005..250317090 100644 --- a/Logs/MetaData.hs +++ b/Logs/MetaData.hs @@ -95,10 +95,12 @@ addMetaData k metadata = addMetaData' k metadata =<< liftIO getPOSIXTime - will tend to be generated across the different log files, and so - git will be able to pack the data more efficiently. -} addMetaData' :: Key -> MetaData -> POSIXTime -> Annex () -addMetaData' k (MetaData m) now = Annex.Branch.change (metaDataLogFile k) $ - showLog . simplifyLog - . S.insert (LogEntry now metadata) - . parseLog +addMetaData' k d@(MetaData m) now + | d == emptyMetaData = noop + | otherwise = Annex.Branch.change (metaDataLogFile k) $ + showLog . simplifyLog + . S.insert (LogEntry now metadata) + . parseLog where metadata = MetaData $ M.filterWithKey (\f _ -> not (isLastChangedField f)) m diff --git a/debian/changelog b/debian/changelog index bf0650496..fa8762bde 100644 --- a/debian/changelog +++ b/debian/changelog @@ -11,6 +11,8 @@ git-annex (5.20140614) UNRELEASED; urgency=medium queue flushing than necessary. * Run standalone install process when the assistant is started (was only being run when the webapp was opened). + * importfeed: When annex.genmetadata is set, metadata from the feed + is added to files that are imported from it. -- Joey Hess <joeyh@debian.org> Mon, 16 Jun 2014 11:28:42 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 2d273bfe4..a6b2cbb83 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -268,7 +268,7 @@ subdirectories). Use `--template` to control where the files are stored. The default template is '${feedtitle}/${itemtitle}${extension}' - (Other available variables: feedauthor, itemauthor, itemsummary, itemdescription, itemrights, itemid, itempubdate) + (Other available variables: feedauthor, itemauthor, itemsummary, itemdescription, itemrights, itemid, itempubdate, title, author) The `--relaxed` and `--fast` options behave the same as they do in addurl. @@ -1346,8 +1346,12 @@ Here are all the supported configuration settings. * `annex.genmetadata` Set this to `true` to make git-annex automatically generate some metadata - when adding files to the repository. In particular, it stores - year and month metadata, from the file's modification date. + when adding files to the repository. + + In particular, it stores year and month metadata, from the file's + modification date. + + When importfeed is used, it stores additional metadata from the feed. * `annex.queuesize` |