diff options
author | Joey Hess <joey@kitenet.net> | 2013-07-28 15:27:36 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2013-07-28 16:55:42 -0400 |
commit | 217068206893864ed05911c3b06d8fdb802750a1 (patch) | |
tree | 3bfa87ee63197405f3aa18138eb8affd3ce7c7e7 /Command | |
parent | 468e2c789371f924ec4586148e4e9e5618c58303 (diff) |
importfeed: git-annex becomes a podcatcher in 150 LOC
Diffstat (limited to 'Command')
-rw-r--r-- | Command/AddUrl.hs | 54 | ||||
-rw-r--r-- | Command/ImportFeed.hs | 148 | ||||
-rw-r--r-- | Command/Indirect.hs | 2 |
3 files changed, 181 insertions, 23 deletions
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index f45d00cc6..5c8c224f2 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2011 Joey Hess <joey@kitenet.net> + - Copyright 2011-2013 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -61,10 +61,7 @@ start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s perform :: Bool -> String -> FilePath -> CommandPerform perform relaxed url file = ifAnnexed file addurl geturl where - geturl = do - liftIO $ createDirectoryIfMissing True (parentDir file) - ifM (Annex.getState Annex.fast <||> pure relaxed) - ( nodownload relaxed url file , download url file ) + geturl = next $ addUrlFile relaxed url file addurl (key, _backend) | relaxed = do setUrlPresent key url @@ -80,22 +77,35 @@ perform relaxed url file = ifAnnexed file addurl geturl stop ) -download :: String -> FilePath -> CommandPerform +addUrlFile :: Bool -> String -> FilePath -> Annex Bool +addUrlFile relaxed url file = do + liftIO $ createDirectoryIfMissing True (parentDir file) + ifM (Annex.getState Annex.fast <||> pure relaxed) + ( nodownload relaxed url file + , do + showAction $ "downloading " ++ url ++ " " + download url file + ) + +download :: String -> FilePath -> Annex Bool download url file = do - showAction $ "downloading " ++ url ++ " " dummykey <- genkey tmp <- fromRepo $ gitAnnexTmpLocation dummykey - stopUnless (runtransfer dummykey tmp) $ do - backend <- chooseBackend file - let source = KeySource - { keyFilename = file - , contentLocation = tmp - , inodeCache = Nothing - } - k <- genKey source backend - case k of - Nothing -> stop - Just (key, _) -> next $ cleanup url file key (Just tmp) + showOutput + ifM (runtransfer dummykey tmp) + ( do + backend <- chooseBackend file + let source = KeySource + { keyFilename = file + , contentLocation = tmp + , inodeCache = Nothing + } + k <- genKey source backend + case k of + Nothing -> return False + Just (key, _) -> cleanup url file key (Just tmp) + , return False + ) where {- Generate a dummy key to use for this download, before we can - examine the file and find its real key. This allows resuming @@ -119,7 +129,7 @@ download url file = do downloadUrl [url] tmp -cleanup :: String -> FilePath -> Key -> Maybe FilePath -> CommandCleanup +cleanup :: String -> FilePath -> Key -> Maybe FilePath -> Annex Bool cleanup url file key mtmp = do when (isJust mtmp) $ logStatus key InfoPresent @@ -133,7 +143,7 @@ cleanup url file key mtmp = do maybe noop (moveAnnex key) mtmp return True -nodownload :: Bool -> String -> FilePath -> CommandPerform +nodownload :: Bool -> String -> FilePath -> Annex Bool nodownload relaxed url file = do headers <- getHttpHeaders (exists, size) <- if relaxed @@ -142,10 +152,10 @@ nodownload relaxed url file = do if exists then do let key = Backend.URL.fromUrl url size - next $ cleanup url file key Nothing + cleanup url file key Nothing else do warning $ "unable to access url: " ++ url - stop + return False url2file :: URI -> Maybe Int -> FilePath url2file url pathdepth = case pathdepth of diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs new file mode 100644 index 000000000..a4a5cfd4c --- /dev/null +++ b/Command/ImportFeed.hs @@ -0,0 +1,148 @@ +{- git-annex command + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.ImportFeed where + +import Text.Feed.Import +import Text.Feed.Query +import Text.Feed.Types +import qualified Data.Set as S +import qualified Data.Map as M +import Data.Char + +import Common.Annex +import Command +import qualified Utility.Url as Url +import Logs.Web +import qualified Option +import qualified Utility.Format +import Utility.Tmp +import Command.AddUrl (addUrlFile, relaxedOption) + +data ToDownload = ToDownload + { feed :: Feed + , item :: Item + , location :: URLString + } + +mkToDownload :: Feed -> Item -> Maybe ToDownload +mkToDownload f i = case getItemEnclosure i of + Nothing -> Nothing + Just (enclosureurl, _, _) -> Just $ ToDownload f i enclosureurl + +def :: [Command] +def = [notBareRepo $ withOptions [templateOption, relaxedOption] $ + command "importfeed" (paramRepeating paramUrl) seek + SectionCommon "import files from podcast feeds"] + +templateOption :: Option +templateOption = Option.field [] "template" paramFormat "template for filenames" + +seek :: [CommandSeek] +seek = [withField templateOption return $ \tmpl -> + withFlag relaxedOption $ \relaxed -> + withWords $ start relaxed tmpl] + +start :: Bool -> Maybe String -> [URLString] -> CommandStart +start relaxed opttemplate = go Nothing + where + go _ [] = stop + go cache (url:urls) = do + showStart "importfeed" url + v <- findEnclosures url + if isJust v then showEndOk else showEndFail + case v of + Just l | not (null l) -> do + knownurls <- getknownurls cache + mapM_ (downloadEnclosure relaxed template knownurls) l + go (Just knownurls) urls + _ -> go cache urls + + defaulttemplate = "${feedtitle}/${itemtitle}.${extension}" + template = Utility.Format.gen $ fromMaybe defaulttemplate opttemplate + + {- This is expensive, so avoid running it more than once. -} + getknownurls (Just cached) = return cached + getknownurls Nothing = S.fromList <$> knownUrls + +findEnclosures :: URLString -> Annex (Maybe [ToDownload]) +findEnclosures url = go =<< downloadFeed url + where + go Nothing = do + warning $ "failed to parse feed " ++ url + return Nothing + go (Just f) = return $ Just $ + mapMaybe (mkToDownload f) (feedItems f) + +{- Feeds change, so a feed download cannot be resumed. -} +downloadFeed :: URLString -> Annex (Maybe Feed) +downloadFeed url = do + showOutput + liftIO $ withTmpFile "feed" $ \f h -> do + ifM (Url.download url [] [] f) + ( parseFeedString <$> hGetContentsStrict h + , return Nothing + ) + +{- Avoids downloading any urls that are already known to be associated + - with a file in the annex. -} +downloadEnclosure :: Bool -> Utility.Format.Format -> S.Set URLString -> ToDownload -> Annex () +downloadEnclosure relaxed template knownurls enclosure + | S.member url knownurls = noop + | otherwise = do + dest <- liftIO $ feedFile template enclosure + showStart "addurl" dest + ifM (addUrlFile relaxed url dest) + ( showEndOk + , showEndFail + ) + where + url = location enclosure + +{- Generate a unique filename for the feed item by filling + - out the template. + - + - Since each feed url is only downloaded once, + - if the file already exists, two items with different urls + - has the same title. A number is added to disambiguate. + -} +feedFile :: Utility.Format.Format -> ToDownload -> IO FilePath +feedFile template i = makeUnique 0 $ + Utility.Format.format template $ M.fromList + [ field "feedtitle" $ getFeedTitle $ feed i + , fieldMaybe "itemtitle" $ getItemTitle $ item i + , fieldMaybe "feedauthor" $ getFeedAuthor $ feed i + , fieldMaybe "itemauthor" $ getItemAuthor $ item i + , fieldMaybe "itemsummary" $ getItemSummary $ item i + , fieldMaybe "itemdescription" $ getItemDescription $ item i + , fieldMaybe "itemrights" $ getItemRights $ item i + , fieldMaybe "itemid" $ snd <$> getItemId (item i) + , field "extension" $ takeExtension $ location i + ] + where + field k v = + let s = map sanitize v in + if null s then (k, "none") else (k, s) + fieldMaybe k Nothing = (k, "none") + fieldMaybe k (Just v) = field k v + + sanitize c + | isSpace c || isPunctuation c || c == '/' = '_' + | otherwise = c + +makeUnique :: Integer -> FilePath -> IO FilePath +makeUnique n file = + ifM (isJust <$> catchMaybeIO (getSymbolicLinkStatus f)) + ( makeUnique (n + 1) file + , return file + ) + where + f = if n == 0 + then file + else + let (d, base) = splitFileName file + in d </> show n ++ "_" ++ base diff --git a/Command/Indirect.hs b/Command/Indirect.hs index bf1509944..e63c4cb8a 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -59,7 +59,7 @@ perform = do setDirect False top <- fromRepo Git.repoPath - (l, clean) <- inRepo $ Git.LsFiles.stagedDetails [top] + (l, clean) <- inRepo $ Git.LsFiles.stagedOthersDetails [top] forM_ l go void $ liftIO clean next cleanup |