summaryrefslogtreecommitdiff
path: root/Command
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-07-28 15:27:36 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-07-28 16:55:42 -0400
commit217068206893864ed05911c3b06d8fdb802750a1 (patch)
tree3bfa87ee63197405f3aa18138eb8affd3ce7c7e7 /Command
parent468e2c789371f924ec4586148e4e9e5618c58303 (diff)
importfeed: git-annex becomes a podcatcher in 150 LOC
Diffstat (limited to 'Command')
-rw-r--r--Command/AddUrl.hs54
-rw-r--r--Command/ImportFeed.hs148
-rw-r--r--Command/Indirect.hs2
3 files changed, 181 insertions, 23 deletions
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index f45d00cc6..5c8c224f2 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2011 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -61,10 +61,7 @@ start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s
perform :: Bool -> String -> FilePath -> CommandPerform
perform relaxed url file = ifAnnexed file addurl geturl
where
- geturl = do
- liftIO $ createDirectoryIfMissing True (parentDir file)
- ifM (Annex.getState Annex.fast <||> pure relaxed)
- ( nodownload relaxed url file , download url file )
+ geturl = next $ addUrlFile relaxed url file
addurl (key, _backend)
| relaxed = do
setUrlPresent key url
@@ -80,22 +77,35 @@ perform relaxed url file = ifAnnexed file addurl geturl
stop
)
-download :: String -> FilePath -> CommandPerform
+addUrlFile :: Bool -> String -> FilePath -> Annex Bool
+addUrlFile relaxed url file = do
+ liftIO $ createDirectoryIfMissing True (parentDir file)
+ ifM (Annex.getState Annex.fast <||> pure relaxed)
+ ( nodownload relaxed url file
+ , do
+ showAction $ "downloading " ++ url ++ " "
+ download url file
+ )
+
+download :: String -> FilePath -> Annex Bool
download url file = do
- showAction $ "downloading " ++ url ++ " "
dummykey <- genkey
tmp <- fromRepo $ gitAnnexTmpLocation dummykey
- stopUnless (runtransfer dummykey tmp) $ do
- backend <- chooseBackend file
- let source = KeySource
- { keyFilename = file
- , contentLocation = tmp
- , inodeCache = Nothing
- }
- k <- genKey source backend
- case k of
- Nothing -> stop
- Just (key, _) -> next $ cleanup url file key (Just tmp)
+ showOutput
+ ifM (runtransfer dummykey tmp)
+ ( do
+ backend <- chooseBackend file
+ let source = KeySource
+ { keyFilename = file
+ , contentLocation = tmp
+ , inodeCache = Nothing
+ }
+ k <- genKey source backend
+ case k of
+ Nothing -> return False
+ Just (key, _) -> cleanup url file key (Just tmp)
+ , return False
+ )
where
{- Generate a dummy key to use for this download, before we can
- examine the file and find its real key. This allows resuming
@@ -119,7 +129,7 @@ download url file = do
downloadUrl [url] tmp
-cleanup :: String -> FilePath -> Key -> Maybe FilePath -> CommandCleanup
+cleanup :: String -> FilePath -> Key -> Maybe FilePath -> Annex Bool
cleanup url file key mtmp = do
when (isJust mtmp) $
logStatus key InfoPresent
@@ -133,7 +143,7 @@ cleanup url file key mtmp = do
maybe noop (moveAnnex key) mtmp
return True
-nodownload :: Bool -> String -> FilePath -> CommandPerform
+nodownload :: Bool -> String -> FilePath -> Annex Bool
nodownload relaxed url file = do
headers <- getHttpHeaders
(exists, size) <- if relaxed
@@ -142,10 +152,10 @@ nodownload relaxed url file = do
if exists
then do
let key = Backend.URL.fromUrl url size
- next $ cleanup url file key Nothing
+ cleanup url file key Nothing
else do
warning $ "unable to access url: " ++ url
- stop
+ return False
url2file :: URI -> Maybe Int -> FilePath
url2file url pathdepth = case pathdepth of
diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs
new file mode 100644
index 000000000..a4a5cfd4c
--- /dev/null
+++ b/Command/ImportFeed.hs
@@ -0,0 +1,148 @@
+{- git-annex command
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.ImportFeed where
+
+import Text.Feed.Import
+import Text.Feed.Query
+import Text.Feed.Types
+import qualified Data.Set as S
+import qualified Data.Map as M
+import Data.Char
+
+import Common.Annex
+import Command
+import qualified Utility.Url as Url
+import Logs.Web
+import qualified Option
+import qualified Utility.Format
+import Utility.Tmp
+import Command.AddUrl (addUrlFile, relaxedOption)
+
+data ToDownload = ToDownload
+ { feed :: Feed
+ , item :: Item
+ , location :: URLString
+ }
+
+mkToDownload :: Feed -> Item -> Maybe ToDownload
+mkToDownload f i = case getItemEnclosure i of
+ Nothing -> Nothing
+ Just (enclosureurl, _, _) -> Just $ ToDownload f i enclosureurl
+
+def :: [Command]
+def = [notBareRepo $ withOptions [templateOption, relaxedOption] $
+ command "importfeed" (paramRepeating paramUrl) seek
+ SectionCommon "import files from podcast feeds"]
+
+templateOption :: Option
+templateOption = Option.field [] "template" paramFormat "template for filenames"
+
+seek :: [CommandSeek]
+seek = [withField templateOption return $ \tmpl ->
+ withFlag relaxedOption $ \relaxed ->
+ withWords $ start relaxed tmpl]
+
+start :: Bool -> Maybe String -> [URLString] -> CommandStart
+start relaxed opttemplate = go Nothing
+ where
+ go _ [] = stop
+ go cache (url:urls) = do
+ showStart "importfeed" url
+ v <- findEnclosures url
+ if isJust v then showEndOk else showEndFail
+ case v of
+ Just l | not (null l) -> do
+ knownurls <- getknownurls cache
+ mapM_ (downloadEnclosure relaxed template knownurls) l
+ go (Just knownurls) urls
+ _ -> go cache urls
+
+ defaulttemplate = "${feedtitle}/${itemtitle}.${extension}"
+ template = Utility.Format.gen $ fromMaybe defaulttemplate opttemplate
+
+ {- This is expensive, so avoid running it more than once. -}
+ getknownurls (Just cached) = return cached
+ getknownurls Nothing = S.fromList <$> knownUrls
+
+findEnclosures :: URLString -> Annex (Maybe [ToDownload])
+findEnclosures url = go =<< downloadFeed url
+ where
+ go Nothing = do
+ warning $ "failed to parse feed " ++ url
+ return Nothing
+ go (Just f) = return $ Just $
+ mapMaybe (mkToDownload f) (feedItems f)
+
+{- Feeds change, so a feed download cannot be resumed. -}
+downloadFeed :: URLString -> Annex (Maybe Feed)
+downloadFeed url = do
+ showOutput
+ liftIO $ withTmpFile "feed" $ \f h -> do
+ ifM (Url.download url [] [] f)
+ ( parseFeedString <$> hGetContentsStrict h
+ , return Nothing
+ )
+
+{- Avoids downloading any urls that are already known to be associated
+ - with a file in the annex. -}
+downloadEnclosure :: Bool -> Utility.Format.Format -> S.Set URLString -> ToDownload -> Annex ()
+downloadEnclosure relaxed template knownurls enclosure
+ | S.member url knownurls = noop
+ | otherwise = do
+ dest <- liftIO $ feedFile template enclosure
+ showStart "addurl" dest
+ ifM (addUrlFile relaxed url dest)
+ ( showEndOk
+ , showEndFail
+ )
+ where
+ url = location enclosure
+
+{- Generate a unique filename for the feed item by filling
+ - out the template.
+ -
+ - Since each feed url is only downloaded once,
+ - if the file already exists, two items with different urls
+ - has the same title. A number is added to disambiguate.
+ -}
+feedFile :: Utility.Format.Format -> ToDownload -> IO FilePath
+feedFile template i = makeUnique 0 $
+ Utility.Format.format template $ M.fromList
+ [ field "feedtitle" $ getFeedTitle $ feed i
+ , fieldMaybe "itemtitle" $ getItemTitle $ item i
+ , fieldMaybe "feedauthor" $ getFeedAuthor $ feed i
+ , fieldMaybe "itemauthor" $ getItemAuthor $ item i
+ , fieldMaybe "itemsummary" $ getItemSummary $ item i
+ , fieldMaybe "itemdescription" $ getItemDescription $ item i
+ , fieldMaybe "itemrights" $ getItemRights $ item i
+ , fieldMaybe "itemid" $ snd <$> getItemId (item i)
+ , field "extension" $ takeExtension $ location i
+ ]
+ where
+ field k v =
+ let s = map sanitize v in
+ if null s then (k, "none") else (k, s)
+ fieldMaybe k Nothing = (k, "none")
+ fieldMaybe k (Just v) = field k v
+
+ sanitize c
+ | isSpace c || isPunctuation c || c == '/' = '_'
+ | otherwise = c
+
+makeUnique :: Integer -> FilePath -> IO FilePath
+makeUnique n file =
+ ifM (isJust <$> catchMaybeIO (getSymbolicLinkStatus f))
+ ( makeUnique (n + 1) file
+ , return file
+ )
+ where
+ f = if n == 0
+ then file
+ else
+ let (d, base) = splitFileName file
+ in d </> show n ++ "_" ++ base
diff --git a/Command/Indirect.hs b/Command/Indirect.hs
index bf1509944..e63c4cb8a 100644
--- a/Command/Indirect.hs
+++ b/Command/Indirect.hs
@@ -59,7 +59,7 @@ perform = do
setDirect False
top <- fromRepo Git.repoPath
- (l, clean) <- inRepo $ Git.LsFiles.stagedDetails [top]
+ (l, clean) <- inRepo $ Git.LsFiles.stagedOthersDetails [top]
forM_ l go
void $ liftIO clean
next cleanup