summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-11-29 15:49:05 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-11-29 16:40:32 -0400
commit3b3f7512f6d0b91de21f6fcc4aba8897174bc4a8 (patch)
tree44c85fda3d6a35d62b00cca26bf6acaf474133c2
parentb7178922644c813a2cb69c185ca751aa234fa05b (diff)
youtube-dl working
Including resuming and cleanup of incomplete downloads. Still todo: --fast, --relaxed, importfeed, disk reserve checking, quvi code cleanup. This commit was sponsored by Anthony DeRobertis on Patreon.
-rw-r--r--Annex/Content.hs13
-rw-r--r--Annex/Locations.hs2
-rw-r--r--Annex/Notification.hs7
-rw-r--r--Annex/Quvi.hs2
-rw-r--r--Annex/Transfer.hs20
-rw-r--r--Annex/YoutubeDl.hs86
-rw-r--r--Command/AddUrl.hs53
-rw-r--r--Remote/Web.hs12
-rw-r--r--Types/GitConfig.hs4
-rw-r--r--Types/Transfer.hs25
-rw-r--r--doc/git-annex-addurl.mdwn8
-rw-r--r--git-annex.cabal1
12 files changed, 163 insertions, 70 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 5b11c7eb1..0899a12ab 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -1017,14 +1017,15 @@ pruneTmpWorkDirBefore f action = do
{- Runs an action, passing it a temporary work directory where
- it can write files while receiving the content of a key.
-
- - On exception, the temporary work directory is left, so resumes can
- - use it.
+ - On exception, or when the action returns a Left value,
+ - the temporary work directory is left, so resumes can use it.
-}
-withTmpWorkDir :: Key -> (FilePath -> Annex a) -> Annex a
-withTmpWorkDir key action = withTmp key $ \obj -> do
+withTmpWorkDir :: Key -> (FilePath -> Annex (Either a b)) -> Annex (Either a b)
+withTmpWorkDir key action = do
-- Create the object file if it does not exist. This way,
-- staleKeysPrune only has to look for object files, and can
-- clean up gitAnnexTmpWorkDir for those it finds.
+ obj <- prepTmp key
unlessM (liftIO $ doesFileExist obj) $ do
liftIO $ writeFile obj ""
setAnnexFilePerm obj
@@ -1032,7 +1033,9 @@ withTmpWorkDir key action = withTmp key $ \obj -> do
liftIO $ createDirectoryIfMissing True tmpdir
setAnnexDirPerm tmpdir
res <- action tmpdir
- liftIO $ removeDirectoryRecursive tmpdir
+ case res of
+ Right _ -> liftIO $ removeDirectoryRecursive tmpdir
+ Left _ -> noop
return res
{- Finds items in the first, smaller list, that are not
diff --git a/Annex/Locations.hs b/Annex/Locations.hs
index acae9c079..bb45d8a3e 100644
--- a/Annex/Locations.hs
+++ b/Annex/Locations.hs
@@ -263,7 +263,7 @@ gitAnnexTmpWorkDir :: FilePath -> FilePath
gitAnnexTmpWorkDir p =
let (dir, f) = splitFileName p
-- Using a prefix avoids name conflict with any other keys.
- in dir </> "work." </> f
+ in dir </> "work." ++ f
{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> FilePath
diff --git a/Annex/Notification.hs b/Annex/Notification.hs
index 6a13d91dd..f3d5006e3 100644
--- a/Annex/Notification.hs
+++ b/Annex/Notification.hs
@@ -39,7 +39,7 @@ instance Transferrable URLString where
{- Wrap around an action that performs a transfer, which may run multiple
- attempts. Displays notification when supported and when the user asked
- for it. -}
-notifyTransfer :: Transferrable t => Direction -> t -> (NotifyWitness -> Annex Bool) -> Annex Bool
+notifyTransfer :: Transferrable t => Observable v => Direction -> t -> (NotifyWitness -> Annex v) -> Annex v
notifyTransfer direction t a = case descTransfrerrable t of
Nothing -> a NotifyWitness
Just desc -> do
@@ -51,12 +51,13 @@ notifyTransfer direction t a = case descTransfrerrable t of
startnotification <- liftIO $ if notifyStart wanted
then Just <$> Notify.notify client (startedTransferNote direction desc)
else pure Nothing
- ok <- a NotifyWitness
+ res <- a NotifyWitness
+ let ok = observeBool res
when (notifyFinish wanted) $ liftIO $ void $ maybe
(Notify.notify client $ finishedTransferNote ok direction desc)
(\n -> Notify.replace client n $ finishedTransferNote ok direction desc)
startnotification
- return ok
+ return res
else a NotifyWitness
#else
a NotifyWitness
diff --git a/Annex/Quvi.hs b/Annex/Quvi.hs
index efc63ca9f..79b9ee6fa 100644
--- a/Annex/Quvi.hs
+++ b/Annex/Quvi.hs
@@ -17,7 +17,7 @@ import Utility.Url
withQuviOptions :: forall a. Query a -> [QuviParams] -> URLString -> Annex a
withQuviOptions a ps url = do
v <- quviVersion
- opts <- map Param . annexQuviOptions <$> Annex.getGitConfig
+ opts <- return []
liftIO $ a v (concatMap (\mkp -> mkp v) ps ++ opts) url
quviSupported :: URLString -> Annex Bool
diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs
index 3fcf1a1b9..ccb5409a7 100644
--- a/Annex/Transfer.hs
+++ b/Annex/Transfer.hs
@@ -5,7 +5,7 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-{-# LANGUAGE CPP, FlexibleInstances, BangPatterns #-}
+{-# LANGUAGE CPP, BangPatterns #-}
module Annex.Transfer (
module X,
@@ -27,7 +27,6 @@ import Annex.Perms
import Utility.Metered
import Annex.LockPool
import Types.Key
-import Types.Remote (Verification(..))
import qualified Types.Remote as Remote
import Types.Concurrency
@@ -35,23 +34,6 @@ import Control.Concurrent
import qualified Data.Map.Strict as M
import Data.Ord
-class Observable a where
- observeBool :: a -> Bool
- observeFailure :: a
-
-instance Observable Bool where
- observeBool = id
- observeFailure = False
-
-instance Observable (Bool, Verification) where
- observeBool = fst
- observeFailure = (False, UnVerified)
-
-instance Observable (Either e Bool) where
- observeBool (Left _) = False
- observeBool (Right b) = b
- observeFailure = Right False
-
upload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
upload u key f d a _witness = guardHaveUUID u $
runTransfer (Transfer Upload u key) f d a
diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs
new file mode 100644
index 000000000..e646f3fe8
--- /dev/null
+++ b/Annex/YoutubeDl.hs
@@ -0,0 +1,86 @@
+{- youtube-dl integration for git-annex
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.YoutubeDl where
+
+import Annex.Common
+import qualified Annex
+import Annex.Content
+import Utility.Url (URLString)
+
+-- Runs youtube-dl in a work directory, to download a single media file
+-- from the url. Reutrns the path to the media file in the work directory.
+--
+-- If youtube-dl fails without writing any files to the work directory,
+-- or is not installed, returns Right Nothing.
+--
+-- The work directory can contain files from a previous run of youtube-dl
+-- and it will resume. It should not contain any other files though,
+-- and youtube-dl needs to finish up with only one file in the directory
+-- so we know which one it downloaded.
+--
+-- (Note that we can't use --output to specifiy the file to download to,
+-- due to <https://github.com/rg3/youtube-dl/issues/14864>)
+youtubeDl :: URLString -> FilePath -> Annex (Either String (Maybe FilePath))
+youtubeDl url workdir = ifM (liftIO (inPath "youtube-dl") <&&> runcmd)
+ ( do
+ fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir
+ case fs of
+ (f:[]) -> return (Right (Just f))
+ [] -> return nofiles
+ _ -> return (toomanyfiles fs)
+ , do
+ fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir
+ if null fs
+ then return (Right Nothing)
+ else return (Left "youtube-dl download is incomplete. Run the command again to resume.")
+ )
+ where
+ nofiles = Left "youtube-dl did not put any media in its work directory, perhaps it's been configured to store files somewhere else?"
+ toomanyfiles fs = Left $ "youtube-dl downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs
+ runcmd = do
+ opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig
+ quiet <- commandProgressDisabled
+ let opts' = opts ++
+ [ Param url
+ -- To make youtube-dl only download one file,
+ -- when given a page with a video and a playlist,
+ -- download only the video.
+ , Param "--no-playlist"
+ -- And when given a page with only a playlist,
+ -- download only the first video on the playlist.
+ -- (Assumes the video is somewhat stable, but
+ -- this is the only way to prevent youtube-dl
+ -- from downloading the whole playlist.)
+ , Param "--playlist-items", Param "0"
+ -- TODO --max-filesize
+ ] ++
+ if quiet then [ Param "--quiet" ] else []
+ liftIO $ boolSystem' "youtube-dl" opts' $
+ \p -> p { cwd = Just workdir }
+
+-- Download a media file to a destination,
+youtubeDlTo :: Key -> URLString -> FilePath -> Annex Bool
+youtubeDlTo key url dest = do
+ res <- withTmpWorkDir key $ \workdir -> do
+ dl <- youtubeDl url workdir
+ case dl of
+ Right (Just mediafile) -> do
+ liftIO $ renameFile mediafile dest
+ return (Right True)
+ Right Nothing -> return (Right False)
+ Left msg -> return (Left msg)
+ case res of
+ Left msg -> do
+ warning msg
+ return False
+ Right r -> return r
+
+-- Check if youtube-dl can still find media in an url.
+youtubeDlSupported :: URLString -> Annex (Either String Bool)
+youtubeDlSupported url = liftIO $ catchMsgIO $
+ snd <$> processTranscript "youtube-dl" [ url, "--simulate" ] Nothing
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index da51a6f29..977bd8001 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -21,6 +21,7 @@ import Annex.Content
import Annex.Ingest
import Annex.CheckIgnore
import Annex.UUID
+import Annex.YoutubeDl
import Logs.Web
import Types.KeySource
import Types.UrlContents
@@ -291,42 +292,44 @@ addUrlFile relaxed url urlinfo file
downloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
downloadWeb url urlinfo file =
- go =<< downloadWith' downloader dummykey webUUID url (AssociatedFile (Just file))
+ go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file))
where
- dummykey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
+ urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
downloader f p = do
showOutput
- downloadUrl dummykey p [url] f
+ downloadUrl urlkey p [url] f
go Nothing = return Nothing
-- If we downloaded a html file, try to use youtube-dl to
-- extract embedded media.
go (Just tmp) = ifM (liftIO $ isHtml <$> readFile tmp)
- ( do
- -- TODO need a directory based on dummykey,
- -- which unused needs to clean up like
- -- it does gitAnnexTmpObjectLocation
- tmpdir <- undefined
- liftIO $ createDirectoryIfMissing True tmpdir
- mf <- youtubeDl url tmpdir
- case mf of
- Just mediafile -> do
- liftIO $ nukeFile tmp
- let mediaurl = setDownloader url YoutubeDownloader
- let key = Backend.URL.fromUrl mediaurl Nothing
- let dest = takeFileName mediafile
- showDestinationFile dest
- cleanup webUUID mediaurl dest key (Just mediafile)
- return (Just key)
- Nothing -> normalfinish tmp
+ ( tryyoutubedl tmp
, normalfinish tmp
)
normalfinish tmp = do
showDestinationFile file
liftIO $ createDirectoryIfMissing True (parentDir file)
finishDownloadWith tmp webUUID url file
-
-youtubeDl :: URLString -> FilePath -> Annex (Maybe FilePath)
-youtubeDl = undefined -- TODO
+ tryyoutubedl tmp = do
+ let mediaurl = setDownloader url YoutubeDownloader
+ let mediakey = Backend.URL.fromUrl mediaurl Nothing
+ res <- withTmpWorkDir mediakey $ \workdir ->
+ Transfer.notifyTransfer Transfer.Download url $
+ Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do
+ dl <- youtubeDl url workdir
+ case dl of
+ Right (Just mediafile) -> do
+ pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
+ let dest = takeFileName mediafile
+ showDestinationFile dest
+ cleanup webUUID mediaurl dest mediakey (Just mediafile)
+ return $ Right $ Just mediakey
+ Right Nothing -> Right <$> normalfinish tmp
+ Left msg -> return $ Left msg
+ case res of
+ Left msg -> do
+ warning msg
+ return Nothing
+ Right r -> return r
showDestinationFile :: FilePath -> Annex ()
showDestinationFile file = do
@@ -388,7 +391,7 @@ cleanup u url file key mtmp = case mtmp of
Nothing -> go
Just tmp -> do
-- Move to final location for large file check.
- liftIO $ renameFile tmp file
+ pruneTmpWorkDirBefore tmp (\_ -> liftIO $ renameFile tmp file)
largematcher <- largeFilesMatcher
large <- checkFileMatcher largematcher file
if large
@@ -407,7 +410,7 @@ cleanup u url file key mtmp = case mtmp of
( do
when (isJust mtmp) $
logStatus key InfoPresent
- , liftIO $ maybe noop nukeFile mtmp
+ , maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . nukeFile)) mtmp
)
-- TODO youtube-dl
diff --git a/Remote/Web.hs b/Remote/Web.hs
index 233c17eb3..5ad66cb15 100644
--- a/Remote/Web.hs
+++ b/Remote/Web.hs
@@ -19,8 +19,7 @@ import Logs.Web
import Annex.UUID
import Utility.Metered
import qualified Annex.Url as Url
-import Annex.Quvi
-import qualified Utility.Quvi as Quvi
+import Annex.YoutubeDl
remote :: RemoteType
remote = RemoteType
@@ -80,9 +79,7 @@ downloadKey key _af dest p = unVerified $ get =<< getWebUrls key
untilTrue urls $ \u -> do
let (u', downloader) = getDownloader u
case downloader of
- QuviDownloader -> do
- flip (downloadUrl key p) dest
- =<< withQuviOptions Quvi.queryLinks [Quvi.httponly, Quvi.quiet] u'
+ YoutubeDownloader -> youtubeDlTo key u' dest
_ -> downloadUrl key p [u'] dest
downloadKeyCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
@@ -109,8 +106,7 @@ checkKey' key us = firsthit us (Right False) $ \u -> do
let (u', downloader) = getDownloader u
showChecking u'
case downloader of
- QuviDownloader ->
- Right <$> withQuviOptions Quvi.check [Quvi.httponly, Quvi.quiet] u'
+ YoutubeDownloader -> youtubeDlSupported u'
_ -> do
Url.withUrlOptions $ catchMsgIO .
Url.checkBoth u' (keySize key)
@@ -126,4 +122,4 @@ getWebUrls :: Key -> Annex [URLString]
getWebUrls key = filter supported <$> getUrls key
where
supported u = snd (getDownloader u)
- `elem` [WebDownloader, QuviDownloader]
+ `elem` [WebDownloader, YoutubeDownloader]
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index 05b5623a6..9a48ad173 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -67,7 +67,7 @@ data GitConfig = GitConfig
, annexSyncContent :: Configurable Bool
, annexDebug :: Bool
, annexWebOptions :: [String]
- , annexQuviOptions :: [String]
+ , annexYoutubeDlOptions :: [String]
, annexAriaTorrentOptions :: [String]
, annexWebDownloadCommand :: Maybe String
, annexCrippledFileSystem :: Bool
@@ -127,7 +127,7 @@ extractGitConfig r = GitConfig
getmaybebool (annex "synccontent")
, annexDebug = getbool (annex "debug") False
, annexWebOptions = getwords (annex "web-options")
- , annexQuviOptions = getwords (annex "quvi-options")
+ , annexYoutubeDlOptions = getwords (annex "youtube-dl-options")
, annexAriaTorrentOptions = getwords (annex "aria-torrent-options")
, annexWebDownloadCommand = getmaybe (annex "web-download-command")
, annexCrippledFileSystem = getbool (annex "crippledfilesystem") False
diff --git a/Types/Transfer.hs b/Types/Transfer.hs
index ade8fc763..73952c56e 100644
--- a/Types/Transfer.hs
+++ b/Types/Transfer.hs
@@ -5,9 +5,12 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE FlexibleInstances #-}
+
module Types.Transfer where
import Types
+import Types.Remote (Verification(..))
import Utility.PID
import Utility.QuickCheck
@@ -66,3 +69,25 @@ instance Arbitrary TransferInfo where
-- associated file cannot be empty (but can be Nothing)
<*> (AssociatedFile <$> arbitrary `suchThat` (/= Just ""))
<*> arbitrary
+
+class Observable a where
+ observeBool :: a -> Bool
+ observeFailure :: a
+
+instance Observable Bool where
+ observeBool = id
+ observeFailure = False
+
+instance Observable (Bool, Verification) where
+ observeBool = fst
+ observeFailure = (False, UnVerified)
+
+instance Observable (Either e Bool) where
+ observeBool (Left _) = False
+ observeBool (Right b) = b
+ observeFailure = Right False
+
+instance Observable (Either e (Maybe a)) where
+ observeBool (Right (Just _)) = True
+ observeBool _ = False
+ observeFailure = Right Nothing
diff --git a/doc/git-annex-addurl.mdwn b/doc/git-annex-addurl.mdwn
index 1457b26b4..5d68f4012 100644
--- a/doc/git-annex-addurl.mdwn
+++ b/doc/git-annex-addurl.mdwn
@@ -10,8 +10,8 @@ git annex addurl `[url ...]`
Downloads each url to its own file, which is added to the annex.
-When `youtube-dl` is installed, it's used to download videos
-embedded on web pages.
+When `youtube-dl` is installed, it's used to check for a video embedded in
+a web page at the url, and that is added to the annex instead.
Urls to torrent files (including magnet links) will cause the content of
the torrent to be downloaded, using `aria2c`.
@@ -28,10 +28,6 @@ be used to get better filenames.
Avoid immediately downloading the url. The url is still checked
(via HEAD) to verify that it exists, and to get its size if possible.
- When `youtube-dl` is installed, videos embedded on web pages
- will be added. To avoid the extra work of checking for videos,
- add the `--raw` option.
-
* `--relaxed`
Don't immediately download the url, and avoid storing the size of the
diff --git a/git-annex.cabal b/git-annex.cabal
index 780961d88..cccedaee9 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -547,6 +547,7 @@ Executable git-annex
Annex.View.ViewedFile
Annex.Wanted
Annex.WorkTree
+ Annex.YoutubeDl
Assistant
Assistant.Alert
Assistant.Alert.Utility