diff options
74 files changed, 795 insertions, 592 deletions
@@ -64,7 +64,6 @@ import Types.LockCache import Types.DesktopNotify import Types.CleanupActions import qualified Database.Keys.Handle as Keys -import Utility.Quvi (QuviVersion) import Utility.InodeCache import Utility.Url @@ -134,7 +133,6 @@ data AnnexState = AnnexState , errcounter :: Integer , unusedkeys :: Maybe (S.Set Key) , tempurls :: M.Map Key URLString - , quviversion :: Maybe QuviVersion , existinghooks :: M.Map Git.Hook.Hook Bool , desktopnotify :: DesktopNotify , workers :: [Either AnnexState (Async AnnexState)] @@ -190,7 +188,6 @@ newState c r = do , errcounter = 0 , unusedkeys = Nothing , tempurls = M.empty - , quviversion = Nothing , existinghooks = M.empty , desktopnotify = mempty , workers = [] diff --git a/Annex/Content.hs b/Annex/Content.hs index 9661f068a..986f673f6 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -21,6 +21,7 @@ module Annex.Content ( prepTmp, withTmp, checkDiskSpace, + needMoreDiskSpace, moveAnnex, populatePointerFile, linkToAnnex, @@ -42,11 +43,13 @@ module Annex.Content ( dirKeys, withObjectLoc, staleKeysPrune, + pruneTmpWorkDirBefore, isUnmodified, verifyKeyContent, VerifyConfig(..), Verification(..), unVerified, + withTmpWorkDir, ) where import System.IO.Unsafe (unsafeInterleaveIO) @@ -303,7 +306,7 @@ getViaTmp' v key action = do (ok, verification) <- action tmpfile if ok then ifM (verifyKeyContent v verification key tmpfile) - ( ifM (moveAnnex key tmpfile) + ( ifM (pruneTmpWorkDirBefore tmpfile (moveAnnex key)) ( do logStatus key InfoPresent return True @@ -311,7 +314,7 @@ getViaTmp' v key action = do ) , do warning "verification of content failed" - liftIO $ nukeFile tmpfile + pruneTmpWorkDirBefore tmpfile (liftIO . nukeFile) return False ) -- On transfer failure, the tmp file is left behind, in case @@ -386,7 +389,7 @@ prepTmp key = do createAnnexDirectory (parentDir tmp) return tmp -{- Creates a temp file for a key, runs an action on it, and cleans up +{- Prepares a temp file for a key, runs an action on it, and cleans up - the temp file. If the action throws an exception, the temp file is - left behind, which allows for resuming. -} @@ -394,7 +397,7 @@ withTmp :: Key -> (FilePath -> Annex a) -> Annex a withTmp key action = do tmp <- prepTmp key res <- action tmp - liftIO $ nukeFile tmp + pruneTmpWorkDirBefore tmp (liftIO . nukeFile) return res {- Checks that there is disk space available to store a given key, @@ -429,16 +432,17 @@ checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getSta let delta = need + reserve - have - alreadythere + inprogress let ok = delta <= 0 unless ok $ - needmorespace delta + warning $ needMoreDiskSpace delta return ok _ -> return True ) where dir = maybe (fromRepo gitAnnexDir) return destdir - needmorespace n = - warning $ "not enough free space, need " ++ - roughSize storageUnits True n ++ - " more" ++ forcemsg + +needMoreDiskSpace :: Integer -> String +needMoreDiskSpace n = "not enough free space, need " ++ + roughSize storageUnits True n ++ " more" ++ forcemsg + where forcemsg = " (use --force to override this check or adjust annex.diskreserve)" {- Moves a key's content into .git/annex/objects/ @@ -989,7 +993,8 @@ staleKeysPrune dirspec nottransferred = do let stale = contents `exclude` dups dir <- fromRepo dirspec - liftIO $ forM_ dups $ \t -> removeFile $ dir </> keyFile t + forM_ dups $ \k -> + pruneTmpWorkDirBefore (dir </> keyFile k) (liftIO . removeFile) if nottransferred then do @@ -998,6 +1003,43 @@ staleKeysPrune dirspec nottransferred = do return $ filter (`S.notMember` inprogress) stale else return stale +{- Prune the work dir associated with the specified content file, + - before performing an action that deletes the file, or moves it away. + - + - This preserves the invariant that the workdir never exists without + - the content file. + -} +pruneTmpWorkDirBefore :: FilePath -> (FilePath -> Annex a) -> Annex a +pruneTmpWorkDirBefore f action = do + let workdir = gitAnnexTmpWorkDir f + liftIO $ whenM (doesDirectoryExist workdir) $ + removeDirectoryRecursive workdir + action f + +{- Runs an action, passing it a temporary work directory where + - it can write files while receiving the content of a key. + - + - On exception, or when the action returns Nothing, + - the temporary work directory is left, so resumes can use it. + -} +withTmpWorkDir :: Key -> (FilePath -> Annex (Maybe a)) -> Annex (Maybe a) +withTmpWorkDir key action = do + -- Create the object file if it does not exist. This way, + -- staleKeysPrune only has to look for object files, and can + -- clean up gitAnnexTmpWorkDir for those it finds. + obj <- prepTmp key + unlessM (liftIO $ doesFileExist obj) $ do + liftIO $ writeFile obj "" + setAnnexFilePerm obj + let tmpdir = gitAnnexTmpWorkDir obj + liftIO $ createDirectoryIfMissing True tmpdir + setAnnexDirPerm tmpdir + res <- action tmpdir + case res of + Just _ -> liftIO $ removeDirectoryRecursive tmpdir + Nothing -> noop + return res + {- Finds items in the first, smaller list, that are not - present in the second, larger list. - diff --git a/Annex/Locations.hs b/Annex/Locations.hs index f86dfc6f4..bb45d8a3e 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -1,6 +1,6 @@ {- git-annex file locations - - - Copyright 2010-2015 Joey Hess <id@joeyh.name> + - Copyright 2010-2017 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -27,6 +27,7 @@ module Annex.Locations ( gitAnnexTmpMiscDir, gitAnnexTmpObjectDir, gitAnnexTmpObjectLocation, + gitAnnexTmpWorkDir, gitAnnexBadDir, gitAnnexBadLocation, gitAnnexUnusedLog, @@ -251,6 +252,19 @@ gitAnnexTmpObjectDir r = addTrailingPathSeparator $ gitAnnexDir r </> "tmp" gitAnnexTmpObjectLocation :: Key -> Git.Repo -> FilePath gitAnnexTmpObjectLocation key r = gitAnnexTmpObjectDir r </> keyFile key +{- Given a temp file such as gitAnnexTmpObjectLocation, makes a name for a + - subdirectory in the same location, that can be used as a work area + - when receiving the key's content. + - + - There are ordering requirements for creating these directories; + - use Annex.Content.withTmpWorkDir to set them up. + -} +gitAnnexTmpWorkDir :: FilePath -> FilePath +gitAnnexTmpWorkDir p = + let (dir, f) = splitFileName p + -- Using a prefix avoids name conflict with any other keys. + in dir </> "work." ++ f + {- .git/annex/bad/ is used for bad files found during fsck -} gitAnnexBadDir :: Git.Repo -> FilePath gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad" diff --git a/Annex/Notification.hs b/Annex/Notification.hs index 0501c0db7..f3d5006e3 100644 --- a/Annex/Notification.hs +++ b/Annex/Notification.hs @@ -5,12 +5,14 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} {-# LANGUAGE CPP #-} module Annex.Notification (NotifyWitness, noNotification, notifyTransfer, notifyDrop) where import Annex.Common import Types.Transfer +import Utility.Url #ifdef WITH_DBUS_NOTIFICATIONS import qualified Annex import Types.DesktopNotify @@ -25,29 +27,40 @@ data NotifyWitness = NotifyWitness noNotification :: NotifyWitness noNotification = NotifyWitness +class Transferrable t where + descTransfrerrable :: t -> Maybe String + +instance Transferrable AssociatedFile where + descTransfrerrable (AssociatedFile af) = af + +instance Transferrable URLString where + descTransfrerrable = Just + {- Wrap around an action that performs a transfer, which may run multiple - attempts. Displays notification when supported and when the user asked - for it. -} -notifyTransfer :: Direction -> AssociatedFile -> (NotifyWitness -> Annex Bool) -> Annex Bool -notifyTransfer _ (AssociatedFile Nothing) a = a NotifyWitness +notifyTransfer :: Transferrable t => Observable v => Direction -> t -> (NotifyWitness -> Annex v) -> Annex v +notifyTransfer direction t a = case descTransfrerrable t of + Nothing -> a NotifyWitness + Just desc -> do #ifdef WITH_DBUS_NOTIFICATIONS -notifyTransfer direction (AssociatedFile (Just f)) a = do - wanted <- Annex.getState Annex.desktopnotify - if (notifyStart wanted || notifyFinish wanted) - then do - client <- liftIO DBus.Client.connectSession - startnotification <- liftIO $ if notifyStart wanted - then Just <$> Notify.notify client (startedTransferNote direction f) - else pure Nothing - ok <- a NotifyWitness - when (notifyFinish wanted) $ liftIO $ void $ maybe - (Notify.notify client $ finishedTransferNote ok direction f) - (\n -> Notify.replace client n $ finishedTransferNote ok direction f) - startnotification - return ok - else a NotifyWitness + wanted <- Annex.getState Annex.desktopnotify + if (notifyStart wanted || notifyFinish wanted) + then do + client <- liftIO DBus.Client.connectSession + startnotification <- liftIO $ if notifyStart wanted + then Just <$> Notify.notify client (startedTransferNote direction desc) + else pure Nothing + res <- a NotifyWitness + let ok = observeBool res + when (notifyFinish wanted) $ liftIO $ void $ maybe + (Notify.notify client $ finishedTransferNote ok direction desc) + (\n -> Notify.replace client n $ finishedTransferNote ok direction desc) + startnotification + return res + else a NotifyWitness #else -notifyTransfer _ (AssociatedFile (Just _)) a = a NotifyWitness + a NotifyWitness #endif notifyDrop :: AssociatedFile -> Bool -> Annex () @@ -63,13 +76,13 @@ notifyDrop (AssociatedFile (Just _)) _ = noop #endif #ifdef WITH_DBUS_NOTIFICATIONS -startedTransferNote :: Direction -> FilePath -> Notify.Note +startedTransferNote :: Direction -> String -> Notify.Note startedTransferNote Upload = mkNote Notify.Transfer Notify.Low iconUpload "Uploading" startedTransferNote Download = mkNote Notify.Transfer Notify.Low iconDownload "Downloading" -finishedTransferNote :: Bool -> Direction -> FilePath -> Notify.Note +finishedTransferNote :: Bool -> Direction -> String -> Notify.Note finishedTransferNote False Upload = mkNote Notify.TransferError Notify.Normal iconFailure "Failed to upload" finishedTransferNote False Download = mkNote Notify.TransferError Notify.Normal iconFailure @@ -79,7 +92,7 @@ finishedTransferNote True Upload = mkNote Notify.TransferComplete Notify.Low finishedTransferNote True Download = mkNote Notify.TransferComplete Notify.Low iconSuccess "Finished downloading" -droppedNote :: Bool -> FilePath -> Notify.Note +droppedNote :: Bool -> String -> Notify.Note droppedNote False = mkNote Notify.TransferError Notify.Normal iconFailure "Failed to drop" droppedNote True = mkNote Notify.TransferComplete Notify.Low iconSuccess diff --git a/Annex/Quvi.hs b/Annex/Quvi.hs deleted file mode 100644 index efc63ca9f..000000000 --- a/Annex/Quvi.hs +++ /dev/null @@ -1,33 +0,0 @@ -{- quvi options for git-annex - - - - Copyright 2013 Joey Hess <id@joeyh.name> - - - - Licensed under the GNU GPL version 3 or higher. - -} - -{-# LANGUAGE Rank2Types #-} - -module Annex.Quvi where - -import Annex.Common -import qualified Annex -import Utility.Quvi -import Utility.Url - -withQuviOptions :: forall a. Query a -> [QuviParams] -> URLString -> Annex a -withQuviOptions a ps url = do - v <- quviVersion - opts <- map Param . annexQuviOptions <$> Annex.getGitConfig - liftIO $ a v (concatMap (\mkp -> mkp v) ps ++ opts) url - -quviSupported :: URLString -> Annex Bool -quviSupported u = liftIO . flip supported u =<< quviVersion - -quviVersion :: Annex QuviVersion -quviVersion = go =<< Annex.getState Annex.quviversion - where - go (Just v) = return v - go Nothing = do - v <- liftIO probeVersion - Annex.changeState $ \s -> s { Annex.quviversion = Just v } - return v diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index 3fcf1a1b9..ccb5409a7 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -5,7 +5,7 @@ - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE CPP, FlexibleInstances, BangPatterns #-} +{-# LANGUAGE CPP, BangPatterns #-} module Annex.Transfer ( module X, @@ -27,7 +27,6 @@ import Annex.Perms import Utility.Metered import Annex.LockPool import Types.Key -import Types.Remote (Verification(..)) import qualified Types.Remote as Remote import Types.Concurrency @@ -35,23 +34,6 @@ import Control.Concurrent import qualified Data.Map.Strict as M import Data.Ord -class Observable a where - observeBool :: a -> Bool - observeFailure :: a - -instance Observable Bool where - observeBool = id - observeFailure = False - -instance Observable (Bool, Verification) where - observeBool = fst - observeFailure = (False, UnVerified) - -instance Observable (Either e Bool) where - observeBool (Left _) = False - observeBool (Right b) = b - observeFailure = Right False - upload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v upload u key f d a _witness = guardHaveUUID u $ runTransfer (Transfer Upload u key) f d a diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs new file mode 100644 index 000000000..d3803075d --- /dev/null +++ b/Annex/YoutubeDl.hs @@ -0,0 +1,142 @@ +{- youtube-dl integration for git-annex + - + - Copyright 2017 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.YoutubeDl where + +import Annex.Common +import qualified Annex +import Annex.Content +import Utility.Url (URLString) +import Utility.DiskFree +import Logs.Transfer + +-- Runs youtube-dl in a work directory, to download a single media file +-- from the url. Reutrns the path to the media file in the work directory. +-- +-- If youtube-dl fails without writing any files to the work directory, +-- or is not installed, returns Right Nothing. +-- +-- The work directory can contain files from a previous run of youtube-dl +-- and it will resume. It should not contain any other files though, +-- and youtube-dl needs to finish up with only one file in the directory +-- so we know which one it downloaded. +-- +-- (Note that we can't use --output to specifiy the file to download to, +-- due to <https://github.com/rg3/youtube-dl/issues/14864>) +youtubeDl :: URLString -> FilePath -> Annex (Either String (Maybe FilePath)) +youtubeDl url workdir = ifM (liftIO $ inPath "youtube-dl") + ( runcmd >>= \case + Right True -> do + fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir + case fs of + (f:[]) -> return (Right (Just f)) + [] -> return nofiles + _ -> return (toomanyfiles fs) + Right False -> do + fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir + if null fs + then return (Right Nothing) + else return (Left "youtube-dl download is incomplete. Run the command again to resume.") + Left msg -> return (Left msg) + , return (Right Nothing) + ) + where + nofiles = Left "youtube-dl did not put any media in its work directory, perhaps it's been configured to store files somewhere else?" + toomanyfiles fs = Left $ "youtube-dl downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs + runcmd = youtubeDlMaxSize workdir >>= \case + Left msg -> return (Left msg) + Right maxsize -> do + quiet <- commandProgressDisabled + opts <- youtubeDlOpts $ dlopts ++ maxsize ++ + if quiet then [ Param "--quiet" ] else [] + ok <- liftIO $ boolSystem' "youtube-dl" opts $ + \p -> p { cwd = Just workdir } + return (Right ok) + dlopts = + [ Param url + -- To make youtube-dl only download one file when given a + -- page with a video and a playlist, download only the video. + , Param "--no-playlist" + -- And when given a page with only a playlist, download only + -- the first video on the playlist. (Assumes the video is + -- somewhat stable, but this is the only way to prevent + -- youtube-dl from downloading the whole playlist.) + , Param "--playlist-items", Param "0" + ] + +-- To honor annex.diskreserve, ask youtube-dl to not download too +-- large a media file. Factors in other downloads that are in progress, +-- and any files in the workdir that it may have partially downloaded +-- before. +youtubeDlMaxSize :: FilePath -> Annex (Either String [CommandParam]) +youtubeDlMaxSize workdir = ifM (Annex.getState Annex.force) + ( return $ Right [] + , liftIO (getDiskFree workdir) >>= \case + Just have -> do + inprogress <- sizeOfDownloadsInProgress (const True) + partial <- liftIO $ sum + <$> (mapM getFileSize =<< dirContents workdir) + reserve <- annexDiskReserve <$> Annex.getGitConfig + let maxsize = have - reserve - inprogress + partial + if maxsize > 0 + then return $ Right + [ Param "--max-filesize" + , Param (show maxsize) + ] + else return $ Left $ + needMoreDiskSpace $ + negate maxsize + 1024 + Nothing -> return $ Right [] + ) + +-- Download a media file to a destination, +youtubeDlTo :: Key -> URLString -> FilePath -> Annex Bool +youtubeDlTo key url dest = do + res <- withTmpWorkDir key $ \workdir -> do + dl <- youtubeDl url workdir + case dl of + Right (Just mediafile) -> do + liftIO $ renameFile mediafile dest + return (Just True) + Right Nothing -> return (Just False) + Left msg -> do + warning msg + return Nothing + return (fromMaybe False res) + +youtubeDlSupported :: URLString -> Annex Bool +youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url + +-- Check if youtube-dl can find media in an url. +youtubeDlCheck :: URLString -> Annex (Either String Bool) +youtubeDlCheck url = catchMsgIO $ do + opts <- youtubeDlOpts [ Param url, Param "--simulate" ] + liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing + +-- Ask youtube-dl for the filename of media in an url. +-- +-- (This is not always identical to the filename it uses when downloading.) +youtubeDlFileName :: URLString -> Annex (Either String FilePath) +youtubeDlFileName url = flip catchIO (pure . Left . show) $ do + -- Sometimes youtube-dl will fail with an ugly backtrace + -- (eg, http://bugs.debian.org/874321) + -- so catch stderr as well as stdout to avoid the user seeing it. + -- --no-warnings avoids warning messages that are output to stdout. + opts <- youtubeDlOpts + [ Param url + , Param "--get-filename" + , Param "--no-warnings" + ] + (output, ok) <- liftIO $ processTranscript "youtube-dl" (toCommand opts) Nothing + return $ case (ok, lines output) of + (True, (f:_)) | not (null f) -> Right f + _ -> Left "no media in url" + +youtubeDlOpts :: [CommandParam] -> Annex [CommandParam] +youtubeDlOpts addopts = do + opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig + return (opts ++ addopts) diff --git a/BuildInfo.hs b/BuildInfo.hs index 47dacc78b..79253ee4f 100644 --- a/BuildInfo.hs +++ b/BuildInfo.hs @@ -81,7 +81,6 @@ buildFlags = filter (not . null) -- Always enabled now, but users may be used to seeing these flags -- listed. , "Feeds" - , "Quvi" ] -- Not a complete list, let alone a listing transitive deps, but only @@ -1,3 +1,17 @@ +git-annex (6.20171125) UNRELEASED; urgency=medium + + * Use youtube-dl rather than quvi to download media from web pages, + since quvi is not being actively developed and youtube-dl supports + many more sites. + * addurl --relaxed got slower, since youtube-dl has to hit the network + to check for embedded media. If you relied on --relaxed not hitting the + network for speed reasons, using --relaxed --raw will get the old level + of speed, but can't be used for urls with embedded videos. + * importfeed now downloads things linked to by feeds, even when they are + not media files. + + -- Joey Hess <id@joeyh.name> Tue, 28 Nov 2017 13:48:44 -0400 + git-annex (6.20171124) unstable; urgency=medium * Display progress meter when uploading a key without size information, diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 866bfc463..0e937dc69 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2011-2014 Joey Hess <id@joeyh.name> + - Copyright 2011-2017 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -21,6 +21,7 @@ import Annex.Content import Annex.Ingest import Annex.CheckIgnore import Annex.UUID +import Annex.YoutubeDl import Logs.Web import Types.KeySource import Types.UrlContents @@ -28,9 +29,8 @@ import Annex.FileMatcher import Logs.Location import Utility.Metered import Utility.FileSystemEncoding +import Utility.HtmlDetect import qualified Annex.Transfer as Transfer -import Annex.Quvi -import qualified Utility.Quvi as Quvi cmd :: Command cmd = notBareRepo $ withGlobalOptions [jobsOption, jsonOption, jsonProgressOption] $ @@ -39,23 +39,23 @@ cmd = notBareRepo $ withGlobalOptions [jobsOption, jsonOption, jsonProgressOptio data AddUrlOptions = AddUrlOptions { addUrls :: CmdParams - , fileOption :: Maybe FilePath , pathdepthOption :: Maybe Int , prefixOption :: Maybe String , suffixOption :: Maybe String - , relaxedOption :: Bool - , rawOption :: Bool + , downloadOptions :: DownloadOptions , batchOption :: BatchMode , batchFilesOption :: Bool } +data DownloadOptions = DownloadOptions + { relaxedOption :: Bool + , rawOption :: Bool + , fileOption :: Maybe FilePath + } + optParser :: CmdParamsDesc -> Parser AddUrlOptions optParser desc = AddUrlOptions <$> cmdParams desc - <*> optional (strOption - ( long "file" <> metavar paramFile - <> help "specify what file the url is added to" - )) <*> optional (option auto ( long "pathdepth" <> metavar paramNumber <> help "number of url path components to use in filename" @@ -68,25 +68,29 @@ optParser desc = AddUrlOptions ( long "suffix" <> metavar paramValue <> help "add a suffix to the filename" )) - <*> parseRelaxedOption - <*> parseRawOption + <*> parseDownloadOptions True <*> parseBatchOption <*> switch ( long "with-files" <> help "parse batch mode lines of the form \"$url $file\"" ) -parseRelaxedOption :: Parser Bool -parseRelaxedOption = switch - ( long "relaxed" - <> help "skip size check" - ) - -parseRawOption :: Parser Bool -parseRawOption = switch - ( long "raw" - <> help "disable special handling for torrents, quvi, etc" - ) +parseDownloadOptions :: Bool -> Parser DownloadOptions +parseDownloadOptions withfileoption = DownloadOptions + <$> switch + ( long "relaxed" + <> help "skip size check" + ) + <*> switch + ( long "raw" + <> help "disable special handling for torrents, youtube-dl, etc" + ) + <*> if withfileoption + then optional (strOption + ( long "file" <> metavar paramFile + <> help "specify what file the url is added to" + )) + else pure Nothing seek :: AddUrlOptions -> CommandSeek seek o = allowConcurrentOutput $ do @@ -97,7 +101,7 @@ seek o = allowConcurrentOutput $ do where go (o', u) = do r <- Remote.claimingUrl u - if Remote.uuid r == webUUID || rawOption o' + if Remote.uuid r == webUUID || rawOption (downloadOptions o') then void $ commandAction $ startWeb o' u else checkUrl r o' u @@ -107,13 +111,13 @@ parseBatchInput o s let (u, f) = separate (== ' ') s in if null u || null f then Left ("parsed empty url or filename in input: " ++ s) - else Right (o { fileOption = Just f }, u) + else Right (o { downloadOptions = (downloadOptions o) { fileOption = Just f } }, u) | otherwise = Right (o, s) checkUrl :: Remote -> AddUrlOptions -> URLString -> Annex () checkUrl r o u = do pathmax <- liftIO $ fileNameLengthLimit "." - let deffile = fromMaybe (urlString2file u (pathdepthOption o) pathmax) (fileOption o) + let deffile = fromMaybe (urlString2file u (pathdepthOption o) pathmax) (fileOption (downloadOptions o)) go deffile =<< maybe (error $ "unable to checkUrl of " ++ Remote.name r) (tryNonAsync . flip id u) @@ -121,50 +125,50 @@ checkUrl r o u = do where go _ (Left e) = void $ commandAction $ do - showStart "addurl" u + showStart' "addurl" (Just u) warning (show e) next $ next $ return False go deffile (Right (UrlContents sz mf)) = do - let f = adjustFile o (fromMaybe (maybe deffile fromSafeFilePath mf) (fileOption o)) - void $ commandAction $ - startRemote r (relaxedOption o) f u sz + let f = adjustFile o (fromMaybe (maybe deffile fromSafeFilePath mf) (fileOption (downloadOptions o))) + void $ commandAction $ startRemote r o f u sz go deffile (Right (UrlMulti l)) - | isNothing (fileOption o) = + | isNothing (fileOption (downloadOptions o)) = forM_ l $ \(u', sz, f) -> do let f' = adjustFile o (deffile </> fromSafeFilePath f) void $ commandAction $ - startRemote r (relaxedOption o) f' u' sz + startRemote r o f' u' sz | otherwise = giveup $ unwords [ "That url contains multiple files according to the" , Remote.name r , " remote; cannot add it to a single file." ] -startRemote :: Remote -> Bool -> FilePath -> URLString -> Maybe Integer -> CommandStart -startRemote r relaxed file uri sz = do +startRemote :: Remote -> AddUrlOptions -> FilePath -> URLString -> Maybe Integer -> CommandStart +startRemote r o file uri sz = do pathmax <- liftIO $ fileNameLengthLimit "." let file' = joinPath $ map (truncateFilePath pathmax) $ splitDirectories file - showStart "addurl" file' + showStart' "addurl" (Just uri) showNote $ "from " ++ Remote.name r - next $ performRemote r relaxed uri file' sz + showDestinationFile file' + next $ performRemote r o uri file' sz -performRemote :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> CommandPerform -performRemote r relaxed uri file sz = ifAnnexed file adduri geturi +performRemote :: Remote -> AddUrlOptions -> URLString -> FilePath -> Maybe Integer -> CommandPerform +performRemote r o uri file sz = ifAnnexed file adduri geturi where loguri = setDownloader uri OtherDownloader - adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize + adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize checkexistssize key = return $ case sz of - Nothing -> (True, True) - Just n -> (True, n == fromMaybe n (keySize key)) - geturi = next $ isJust <$> downloadRemoteFile r relaxed uri file sz + Nothing -> (True, True, uri) + Just n -> (True, n == fromMaybe n (keySize key), uri) + geturi = next $ isJust <$> downloadRemoteFile r (downloadOptions o) uri file sz -downloadRemoteFile :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key) -downloadRemoteFile r relaxed uri file sz = checkCanAdd file $ do +downloadRemoteFile :: Remote -> DownloadOptions -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key) +downloadRemoteFile r o uri file sz = checkCanAdd file $ do let urlkey = Backend.URL.fromUrl uri sz liftIO $ createDirectoryIfMissing True (parentDir file) - ifM (Annex.getState Annex.fast <||> pure relaxed) + ifM (Annex.getState Annex.fast <||> pure (relaxedOption o)) ( do - cleanup (Remote.uuid r) loguri file urlkey Nothing + addWorkTree (Remote.uuid r) loguri file urlkey Nothing return (Just urlkey) , do -- Set temporary url for the urlkey @@ -181,24 +185,18 @@ downloadRemoteFile r relaxed uri file sz = checkCanAdd file $ do where loguri = setDownloader uri OtherDownloader -startWeb :: AddUrlOptions -> String -> CommandStart -startWeb o s = go $ fromMaybe bad $ parseURI urlstring +startWeb :: AddUrlOptions -> URLString -> CommandStart +startWeb o urlstring = go $ fromMaybe bad $ parseURI urlstring where - (urlstring, downloader) = getDownloader s bad = fromMaybe (giveup $ "bad url " ++ urlstring) $ Url.parseURIRelaxed $ urlstring - go url = case downloader of - QuviDownloader -> usequvi - _ -> ifM (quviSupported urlstring) - ( usequvi - , regulardownload url - ) - regulardownload url = do + go url = do + showStart' "addurl" (Just urlstring) pathmax <- liftIO $ fileNameLengthLimit "." - urlinfo <- if relaxedOption o + urlinfo <- if relaxedOption (downloadOptions o) then pure Url.assumeUrlExists else Url.withUrlOptions (Url.getUrlInfo urlstring) - file <- adjustFile o <$> case fileOption o of + file <- adjustFile o <$> case fileOption (downloadOptions o) of Just f -> pure f Nothing -> case Url.urlSuggestedFile urlinfo of Nothing -> pure $ url2file url (pathdepthOption o) pathmax @@ -209,79 +207,31 @@ startWeb o s = go $ fromMaybe bad $ parseURI urlstring ( pure $ url2file url (pathdepthOption o) pathmax , pure f ) - showStart "addurl" file - next $ performWeb (relaxedOption o) urlstring file urlinfo - badquvi = giveup $ "quvi does not know how to download url " ++ urlstring - usequvi = do - page <- fromMaybe badquvi - <$> withQuviOptions Quvi.forceQuery [Quvi.quiet, Quvi.httponly] urlstring - let link = fromMaybe badquvi $ headMaybe $ Quvi.pageLinks page - pathmax <- liftIO $ fileNameLengthLimit "." - let file = adjustFile o $ flip fromMaybe (fileOption o) $ - truncateFilePath pathmax $ sanitizeFilePath $ - Quvi.pageTitle page ++ "." ++ fromMaybe "m" (Quvi.linkSuffix link) - showStart "addurl" file - next $ performQuvi (relaxedOption o) urlstring (Quvi.linkUrl link) file - -performWeb :: Bool -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform -performWeb relaxed url file urlinfo = ifAnnexed file addurl geturl - where - geturl = next $ isJust <$> addUrlFile relaxed url urlinfo file - addurl = addUrlChecked relaxed url webUUID $ \k -> return $ - (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k) + next $ performWeb o urlstring file urlinfo -performQuvi :: Bool -> URLString -> URLString -> FilePath -> CommandPerform -performQuvi relaxed pageurl videourl file = ifAnnexed file addurl geturl - where - quviurl = setDownloader pageurl QuviDownloader - addurl key = next $ do - cleanup webUUID quviurl file key Nothing - return True - geturl = next $ isJust <$> addUrlFileQuvi relaxed quviurl videourl file - -addUrlFileQuvi :: Bool -> URLString -> URLString -> FilePath -> Annex (Maybe Key) -addUrlFileQuvi relaxed quviurl videourl file = checkCanAdd file $ do - let key = Backend.URL.fromUrl quviurl Nothing - ifM (pure relaxed <||> Annex.getState Annex.fast) - ( do - cleanup webUUID quviurl file key Nothing - return (Just key) - , do - {- Get the size, and use that to check - - disk space. However, the size info is not - - retained, because the size of a video stream - - might change and we want to be able to download - - it later. -} - urlinfo <- Url.withUrlOptions (Url.getUrlInfo videourl) - let sizedkey = addSizeUrlKey urlinfo key - checkDiskSpaceToGet sizedkey Nothing $ do - tmp <- fromRepo $ gitAnnexTmpObjectLocation key - showOutput - ok <- Transfer.notifyTransfer Transfer.Download afile $ - Transfer.download webUUID key afile Transfer.forwardRetry $ \p -> do - liftIO $ createDirectoryIfMissing True (parentDir tmp) - downloadUrl key p [videourl] tmp - if ok - then do - cleanup webUUID quviurl file key (Just tmp) - return (Just key) - else return Nothing - ) +performWeb :: AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform +performWeb o url file urlinfo = ifAnnexed file addurl geturl where - afile = AssociatedFile (Just file) + geturl = next $ isJust <$> addUrlFile (downloadOptions o) url urlinfo file + addurl = addUrlChecked o url file webUUID $ \k -> + ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url) + ( return (True, True, setDownloader url YoutubeDownloader) + , return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url) + ) -addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool)) -> Key -> CommandPerform -addUrlChecked relaxed url u checkexistssize key - | relaxed = do - setUrlPresent u key url - next $ return True - | otherwise = ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key)) - ( next $ return True -- nothing to do +{- Check that the url exists, and has the same size as the key, + - and add it as an url to the key. -} +addUrlChecked :: AddUrlOptions -> URLString -> FilePath -> UUID -> (Key -> Annex (Bool, Bool, URLString)) -> Key -> CommandPerform +addUrlChecked o url file u checkexistssize key = + ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key)) + ( do + showDestinationFile file + next $ return True , do - (exists, samesize) <- checkexistssize key - if exists && samesize + (exists, samesize, url') <- checkexistssize key + if exists && (samesize || relaxedOption (downloadOptions o)) then do - setUrlPresent u key url + setUrlPresent u key url' next $ return True else do warning $ "while adding a new url to an already annexed file, " ++ if exists @@ -290,64 +240,122 @@ addUrlChecked relaxed url u checkexistssize key stop ) -addUrlFile :: Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) -addUrlFile relaxed url urlinfo file = checkCanAdd file $ do - liftIO $ createDirectoryIfMissing True (parentDir file) - ifM (Annex.getState Annex.fast <||> pure relaxed) - ( nodownload url urlinfo file - , downloadWeb url urlinfo file +{- Downloads an url (except in fast or relaxed mode) and adds it to the + - repository, normally at the specified FilePath. + - But, if youtube-dl supports the url, it will be written to a + - different file, based on the title of the media. Unless the user + - specified fileOption, which then forces using the FilePath. + -} +addUrlFile :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) +addUrlFile o url urlinfo file = + ifM (Annex.getState Annex.fast <||> pure (relaxedOption o)) + ( nodownloadWeb o url urlinfo file + , downloadWeb o url urlinfo file ) -downloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) -downloadWeb url urlinfo file = do - let dummykey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing - let downloader f p = do +downloadWeb :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) +downloadWeb o url urlinfo file = + go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file)) + where + urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing + downloader f p = do showOutput - downloadUrl dummykey p [url] f - showAction $ "downloading " ++ url ++ " " - downloadWith downloader dummykey webUUID url file + downloadUrl urlkey p [url] f + go Nothing = return Nothing + -- If we downloaded a html file, try to use youtube-dl to + -- extract embedded media. + go (Just tmp) = ifM (pure (not (rawOption o)) <&&> liftIO (isHtml <$> readFile tmp)) + ( tryyoutubedl tmp + , normalfinish tmp + ) + normalfinish tmp = checkCanAdd file $ do + showDestinationFile file + liftIO $ createDirectoryIfMissing True (parentDir file) + finishDownloadWith tmp webUUID url file + tryyoutubedl tmp = withTmpWorkDir mediakey $ \workdir -> + Transfer.notifyTransfer Transfer.Download url $ + Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do + dl <- youtubeDl url workdir + case dl of + Right (Just mediafile) -> do + pruneTmpWorkDirBefore tmp (liftIO . nukeFile) + let dest = if isJust (fileOption o) + then file + else takeFileName mediafile + checkCanAdd dest $ do + showDestinationFile dest + addWorkTree webUUID mediaurl dest mediakey (Just mediafile) + return $ Just mediakey + Right Nothing -> normalfinish tmp + Left msg -> do + warning msg + return Nothing + where + mediaurl = setDownloader url YoutubeDownloader + mediakey = Backend.URL.fromUrl mediaurl Nothing + +showDestinationFile :: FilePath -> Annex () +showDestinationFile file = do + showNote ("to " ++ file) + maybeShowJSON $ JSONChunk [("file", file)] {- The Key should be a dummy key, based on the URL, which is used - for this download, before we can examine the file and find its real key. - For resuming downloads to work, the dummy key for a given url should be - - stable. -} + - stable. For disk space checking to work, the dummy key should have + - the size of the url already set. + - + - Downloads the url, sets up the worktree file, and returns the + - real key. + -} downloadWith :: (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> FilePath -> Annex (Maybe Key) downloadWith downloader dummykey u url file = - checkDiskSpaceToGet dummykey Nothing $ do - tmp <- fromRepo $ gitAnnexTmpObjectLocation dummykey - ifM (runtransfer tmp) - ( do - backend <- chooseBackend file - let source = KeySource - { keyFilename = file - , contentLocation = tmp - , inodeCache = Nothing - } - k <- genKey source backend - case k of - Nothing -> return Nothing - Just (key, _) -> do - cleanup u url file key (Just tmp) - return (Just key) - , return Nothing - ) + go =<< downloadWith' downloader dummykey u url afile where - runtransfer tmp = Transfer.notifyTransfer Transfer.Download afile $ - Transfer.download u dummykey afile Transfer.forwardRetry $ \p -> do - liftIO $ createDirectoryIfMissing True (parentDir tmp) - downloader tmp p afile = AssociatedFile (Just file) + go Nothing = return Nothing + go (Just tmp) = finishDownloadWith tmp u url file + +{- Like downloadWith, but leaves the dummy key content in + - the returned location. -} +downloadWith' :: (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> AssociatedFile -> Annex (Maybe FilePath) +downloadWith' downloader dummykey u url afile = + checkDiskSpaceToGet dummykey Nothing $ do + tmp <- fromRepo $ gitAnnexTmpObjectLocation dummykey + ok <- Transfer.notifyTransfer Transfer.Download url $ + Transfer.download u dummykey afile Transfer.forwardRetry $ \p -> do + liftIO $ createDirectoryIfMissing True (parentDir tmp) + downloader tmp p + if ok + then return (Just tmp) + else return Nothing + +finishDownloadWith :: FilePath -> UUID -> URLString -> FilePath -> Annex (Maybe Key) +finishDownloadWith tmp u url file = do + backend <- chooseBackend file + let source = KeySource + { keyFilename = file + , contentLocation = tmp + , inodeCache = Nothing + } + k <- genKey source backend + case k of + Nothing -> return Nothing + Just (key, _) -> do + addWorkTree u url file key (Just tmp) + return (Just key) {- Adds the url size to the Key. -} addSizeUrlKey :: Url.UrlInfo -> Key -> Key addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo } -cleanup :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () -cleanup u url file key mtmp = case mtmp of +{- Adds worktree file to the repository. -} +addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () +addWorkTree u url file key mtmp = case mtmp of Nothing -> go Just tmp -> do -- Move to final location for large file check. - liftIO $ renameFile tmp file + pruneTmpWorkDirBefore tmp (\_ -> liftIO $ renameFile tmp file) largematcher <- largeFilesMatcher large <- checkFileMatcher largematcher file if large @@ -366,18 +374,36 @@ cleanup u url file key mtmp = case mtmp of ( do when (isJust mtmp) $ logStatus key InfoPresent - , liftIO $ maybe noop nukeFile mtmp + , maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . nukeFile)) mtmp ) -nodownload :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) -nodownload url urlinfo file - | Url.urlExists urlinfo = do - let key = Backend.URL.fromUrl url (Url.urlSize urlinfo) - cleanup webUUID url file key Nothing - return (Just key) +nodownloadWeb :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) +nodownloadWeb o url urlinfo file + | Url.urlExists urlinfo = if rawOption o + then nomedia + else either (const nomedia) usemedia + =<< youtubeDlFileName url | otherwise = do warning $ "unable to access url: " ++ url return Nothing + where + nomedia = do + let key = Backend.URL.fromUrl url (Url.urlSize urlinfo) + nodownloadWeb' url key file + usemedia mediafile = do + let dest = if isJust (fileOption o) + then file + else takeFileName mediafile + let mediaurl = setDownloader url YoutubeDownloader + let mediakey = Backend.URL.fromUrl mediaurl Nothing + nodownloadWeb' mediaurl mediakey dest + +nodownloadWeb' :: URLString -> Key -> FilePath -> Annex (Maybe Key) +nodownloadWeb' url key file = checkCanAdd file $ do + showDestinationFile file + liftIO $ createDirectoryIfMissing True (parentDir file) + addWorkTree webUUID url file key Nothing + return (Just key) url2file :: URI -> Maybe Int -> Int -> FilePath url2file url pathdepth pathmax = case pathdepth of @@ -411,7 +437,7 @@ adjustFile o = addprefix . addsuffix checkCanAdd :: FilePath -> Annex (Maybe a) -> Annex (Maybe a) checkCanAdd file a = ifM (isJust <$> (liftIO $ catchMaybeIO $ getSymbolicLinkStatus file)) ( do - warning $ file ++ " already exists and is not annexed; not overwriting" + warning $ file ++ " already exists; not overwriting" return Nothing , ifM ((not <$> Annex.getState Annex.force) <&&> checkIgnored file) ( do diff --git a/Command/Adjust.hs b/Command/Adjust.hs index 204fd057a..0fef3f936 100644 --- a/Command/Adjust.hs +++ b/Command/Adjust.hs @@ -38,5 +38,5 @@ seek = commandAction . start start :: Adjustment -> CommandStart start adj = do checkVersionSupported - showStart "adjust" "" + showStart' "adjust" Nothing next $ next $ enterAdjustedBranch adj diff --git a/Command/Config.hs b/Command/Config.hs index 5da196044..47415999d 100644 --- a/Command/Config.hs +++ b/Command/Config.hs @@ -50,14 +50,14 @@ optParser _ = setconfig <|> getconfig <|> unsetconfig seek :: Action -> CommandSeek seek (SetConfig name val) = commandAction $ do allowMessages - showStart name val + showStart' name (Just val) next $ next $ do setGlobalConfig name val setConfig (ConfigKey name) val return True seek (UnsetConfig name) = commandAction $ do allowMessages - showStart name "unset" + showStart' name (Just "unset") next $ next $ do unsetGlobalConfig name unsetConfig (ConfigKey name) diff --git a/Command/Dead.hs b/Command/Dead.hs index 44cf7b7f6..385dd6fad 100644 --- a/Command/Dead.hs +++ b/Command/Dead.hs @@ -33,7 +33,7 @@ seek (DeadKeys ks) = seekActions $ pure $ map startKey ks startKey :: Key -> CommandStart startKey key = do - showStart "dead" (key2file key) + showStart' "dead" (Just $ key2file key) ls <- keyLocations key case ls of [] -> next $ performKey key diff --git a/Command/Describe.hs b/Command/Describe.hs index dc7a5d8f9..4b86a7b58 100644 --- a/Command/Describe.hs +++ b/Command/Describe.hs @@ -22,7 +22,7 @@ seek = withWords start start :: [String] -> CommandStart start (name:description) = do - showStart "describe" name + showStart' "describe" (Just name) u <- Remote.nameToUUID name next $ perform u $ unwords description start _ = giveup "Specify a repository and a description." diff --git a/Command/Direct.hs b/Command/Direct.hs index 817cedd53..20eeef726 100644 --- a/Command/Direct.hs +++ b/Command/Direct.hs @@ -31,7 +31,7 @@ start = ifM versionSupportsDirectMode perform :: CommandPerform perform = do - showStart "commit" "" + showStart' "commit" Nothing showOutput _ <- inRepo $ Git.Branch.commitCommand Git.Branch.ManualCommit [ Param "-a" @@ -65,6 +65,6 @@ perform = do cleanup :: CommandCleanup cleanup = do - showStart "direct" "" + showStart' "direct" Nothing setDirect True return True diff --git a/Command/Drop.hs b/Command/Drop.hs index b03e3e080..275714a65 100644 --- a/Command/Drop.hs +++ b/Command/Drop.hs @@ -89,12 +89,12 @@ startKeys o key = start' o key (AssociatedFile Nothing) startLocal :: AssociatedFile -> ActionItem -> NumCopies -> Key -> [VerifiedCopy] -> CommandStart startLocal afile ai numcopies key preverified = stopUnless (inAnnex key) $ do - showStart' "drop" key ai + showStartKey "drop" key ai next $ performLocal key afile numcopies preverified startRemote :: AssociatedFile -> ActionItem -> NumCopies -> Key -> Remote -> CommandStart startRemote afile ai numcopies key remote = do - showStart' ("drop " ++ Remote.name remote) key ai + showStartKey ("drop " ++ Remote.name remote) key ai next $ performRemote key afile numcopies remote performLocal :: Key -> AssociatedFile -> NumCopies -> [VerifiedCopy] -> CommandPerform diff --git a/Command/DropKey.hs b/Command/DropKey.hs index 65446ba06..f3f2333dd 100644 --- a/Command/DropKey.hs +++ b/Command/DropKey.hs @@ -42,7 +42,7 @@ seek o = do start :: Key -> CommandStart start key = do - showStart' "dropkey" key (mkActionItem key) + showStartKey "dropkey" key (mkActionItem key) next $ perform key perform :: Key -> CommandPerform diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs index 840a8a472..c5a61d739 100644 --- a/Command/DropUnused.hs +++ b/Command/DropUnused.hs @@ -55,5 +55,5 @@ perform from numcopies key = case from of performOther :: (Key -> Git.Repo -> FilePath) -> Key -> CommandPerform performOther filespec key = do f <- fromRepo $ filespec key - liftIO $ nukeFile f + pruneTmpWorkDirBefore f (liftIO . nukeFile) next $ return True diff --git a/Command/EnableRemote.hs b/Command/EnableRemote.hs index fd830375a..d9993ebc9 100644 --- a/Command/EnableRemote.hs +++ b/Command/EnableRemote.hs @@ -55,7 +55,7 @@ start (name:rest) = go =<< filter matchingname <$> Annex.fromRepo Git.remotes startNormalRemote :: Git.RemoteName -> [String] -> Git.Repo -> CommandStart startNormalRemote name restparams r | null restparams = do - showStart "enableremote" name + showStart' "enableremote" (Just name) next $ next $ do setRemoteIgnore r False r' <- Remote.Git.configRead False r @@ -77,7 +77,7 @@ startSpecialRemote name config Nothing = do startSpecialRemote name config (Just (u, c)) = do let fullconfig = config `M.union` c t <- either giveup return (Annex.SpecialRemote.findType fullconfig) - showStart "enableremote" name + showStart' "enableremote" (Just name) gc <- maybe (liftIO dummyRemoteGitConfig) (return . Remote.gitconfig) =<< Remote.byUUID u diff --git a/Command/EnableTor.hs b/Command/EnableTor.hs index 7076a9a73..72fa50448 100644 --- a/Command/EnableTor.hs +++ b/Command/EnableTor.hs @@ -51,7 +51,7 @@ start os = do Nothing -> giveup "Need user-id parameter." Just userid -> go uuid userid else do - showStart "enable-tor" "" + showStart' "enable-tor" Nothing gitannex <- liftIO readProgramFile let ps = [Param (cmdname cmd), Param (show curruserid)] sucommand <- liftIO $ mkSuCommand gitannex ps diff --git a/Command/Expire.hs b/Command/Expire.hs index 551742304..28f90dfb5 100644 --- a/Command/Expire.hs +++ b/Command/Expire.hs @@ -59,12 +59,12 @@ start :: Expire -> Bool -> Log Activity -> M.Map UUID String -> UUID -> CommandS start (Expire expire) noact actlog descs u = case lastact of Just ent | notexpired ent -> checktrust (== DeadTrusted) $ do - showStart "unexpire" desc + showStart' "unexpire" (Just desc) showNote =<< whenactive unless noact $ trustSet u SemiTrusted _ -> checktrust (/= DeadTrusted) $ do - showStart "expire" desc + showStart' "expire" (Just desc) showNote =<< whenactive unless noact $ trustSet u DeadTrusted diff --git a/Command/Forget.hs b/Command/Forget.hs index d172cc693..40ac98d4b 100644 --- a/Command/Forget.hs +++ b/Command/Forget.hs @@ -34,7 +34,7 @@ seek = commandAction . start start :: ForgetOptions -> CommandStart start o = do - showStart "forget" "git-annex" + showStart' "forget" (Just "git-annex") c <- liftIO currentVectorClock let basets = addTransition c ForgetGitHistory noTransitions let ts = if dropDead o diff --git a/Command/FromKey.hs b/Command/FromKey.hs index c1e3a7965..1b276db99 100644 --- a/Command/FromKey.hs +++ b/Command/FromKey.hs @@ -41,7 +41,7 @@ start force (keyname, file) = do startMass :: CommandStart startMass = do - showStart "fromkey" "stdin" + showStart' "fromkey" (Just "stdin") next massAdd massAdd :: CommandPerform diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 514421e93..bc7a29f15 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -536,7 +536,7 @@ badContentRemote remote localcopy key = do runFsck :: Incremental -> ActionItem -> Key -> Annex Bool -> CommandStart runFsck inc ai key a = ifM (needFsck inc key) ( do - showStart' "fsck" key ai + showStartKey "fsck" key ai next $ do ok <- a when ok $ diff --git a/Command/Get.hs b/Command/Get.hs index a412b2cb3..a74ca253f 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -71,7 +71,7 @@ start' expensivecheck from key afile ai = onlyActionOn key $ go $ Command.Move.fromPerform src False key afile where go a = do - showStart' "get" key ai + showStartKey "get" key ai next a perform :: Key -> AssociatedFile -> CommandPerform diff --git a/Command/Group.hs b/Command/Group.hs index 65e062589..52b7e688c 100644 --- a/Command/Group.hs +++ b/Command/Group.hs @@ -24,7 +24,7 @@ seek = withWords start start :: [String] -> CommandStart start (name:g:[]) = do allowMessages - showStart "group" name + showStart' "group" (Just name) u <- Remote.nameToUUID name next $ setGroup u g start (name:[]) = do diff --git a/Command/GroupWanted.hs b/Command/GroupWanted.hs index 939b3030a..8dab02933 100644 --- a/Command/GroupWanted.hs +++ b/Command/GroupWanted.hs @@ -24,6 +24,6 @@ start :: [String] -> CommandStart start (g:[]) = next $ performGet groupPreferredContentMapRaw g start (g:expr:[]) = do allowMessages - showStart "groupwanted" g + showStart' "groupwanted" (Just g) next $ performSet groupPreferredContentSet expr g start _ = giveup "Specify a group." diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index d2989f05b..a02d11824 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2013 Joey Hess <id@joeyh.name> + - Copyright 2013-2017 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -32,16 +32,16 @@ import Types.UrlContents import Logs.Web import qualified Utility.Format import Utility.Tmp -import Command.AddUrl (addUrlFile, downloadRemoteFile, parseRelaxedOption, parseRawOption) +import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, DownloadOptions(..)) import Annex.Perms import Annex.UUID import Backend.URL (fromUrl) -import Annex.Quvi -import qualified Utility.Quvi as Quvi -import Command.AddUrl (addUrlFileQuvi) +import Annex.Content +import Annex.YoutubeDl import Types.MetaData import Logs.MetaData import Annex.MetaData +import Command.AddUrl (addWorkTree) cmd :: Command cmd = notBareRepo $ @@ -51,8 +51,7 @@ cmd = notBareRepo $ data ImportFeedOptions = ImportFeedOptions { feedUrls :: CmdParams , templateOption :: Maybe String - , relaxedOption :: Bool - , rawOption :: Bool + , downloadOptions :: DownloadOptions } optParser :: CmdParamsDesc -> Parser ImportFeedOptions @@ -62,8 +61,7 @@ optParser desc = ImportFeedOptions ( long "template" <> metavar paramFormat <> help "template for filenames" )) - <*> parseRelaxedOption - <*> parseRawOption + <*> parseDownloadOptions False seek :: ImportFeedOptions -> CommandSeek seek o = do @@ -72,7 +70,7 @@ seek o = do start :: ImportFeedOptions -> Cache -> URLString -> CommandStart start opts cache url = do - showStart "importfeed" url + showStart' "importfeed" (Just url) next $ perform opts cache url perform :: ImportFeedOptions -> Cache -> URLString -> CommandPerform @@ -101,7 +99,7 @@ data ToDownload = ToDownload , location :: DownloadLocation } -data DownloadLocation = Enclosure URLString | QuviLink URLString +data DownloadLocation = Enclosure URLString | MediaLink URLString type ItemId = String @@ -141,14 +139,10 @@ findDownloads u = go =<< downloadFeed u Just (enclosureurl, _, _) -> return $ Just $ ToDownload f u i $ Enclosure $ fromFeed enclosureurl - Nothing -> mkquvi f i - mkquvi f i = case getItemLink i of - Just link -> ifM (quviSupported $ fromFeed link) - ( return $ Just $ ToDownload f u i $ QuviLink $ - fromFeed link - , return Nothing - ) - Nothing -> return Nothing + Nothing -> case getItemLink i of + Just link -> return $ Just $ ToDownload f u i $ + MediaLink $ fromFeed link + Nothing -> return Nothing {- Feeds change, so a feed download cannot be resumed. -} downloadFeed :: URLString -> Annex (Maybe Feed) @@ -169,12 +163,19 @@ performDownload opts cache todownload = case location todownload of Enclosure url -> checkknown url $ rundownload url (takeWhile (/= '?') $ takeExtension url) $ \f -> do r <- Remote.claimingUrl url - if Remote.uuid r == webUUID || rawOption opts + if Remote.uuid r == webUUID || rawOption (downloadOptions opts) then do - urlinfo <- if relaxedOption opts + urlinfo <- if relaxedOption (downloadOptions opts) then pure Url.assumeUrlExists else Url.withUrlOptions (Url.getUrlInfo url) - maybeToList <$> addUrlFile (relaxedOption opts) url urlinfo f + let dlopts = (downloadOptions opts) + -- force using the filename + -- chosen here + { fileOption = Just f + -- don't use youtube-dl + , rawOption = True + } + maybeToList <$> addUrlFile dlopts url urlinfo f else do res <- tryNonAsync $ maybe (error $ "unable to checkUrl of " ++ Remote.name r) @@ -184,27 +185,26 @@ performDownload opts cache todownload = case location todownload of Left _ -> return [] Right (UrlContents sz _) -> maybeToList <$> - downloadRemoteFile r (relaxedOption opts) url f sz + downloadRemoteFile r (downloadOptions opts) url f sz Right (UrlMulti l) -> do kl <- forM l $ \(url', sz, subf) -> - downloadRemoteFile r (relaxedOption opts) url' (f </> fromSafeFilePath subf) sz + downloadRemoteFile r (downloadOptions opts) url' (f </> fromSafeFilePath subf) sz return $ if all isJust kl then catMaybes kl else [] - QuviLink pageurl -> do - let quviurl = setDownloader pageurl QuviDownloader - checkknown quviurl $ do - mp <- withQuviOptions Quvi.query [Quvi.quiet, Quvi.httponly] pageurl - case mp of - Nothing -> return False - Just page -> case headMaybe $ Quvi.pageLinks page of - Nothing -> return False - Just link -> do - let videourl = Quvi.linkUrl link - checkknown videourl $ - rundownload videourl ("." ++ fromMaybe "m" (Quvi.linkSuffix link)) $ \f -> - maybeToList <$> addUrlFileQuvi (relaxedOption opts) quviurl videourl f + MediaLink linkurl -> do + let mediaurl = setDownloader linkurl YoutubeDownloader + let mediakey = Backend.URL.fromUrl mediaurl Nothing + -- Old versions of git-annex that used quvi might have + -- used the quviurl for this, so check i/f it's known + -- to avoid adding it a second time. + let quviurl = setDownloader linkurl QuviDownloader + checkknown mediaurl $ checkknown quviurl $ + ifM (Annex.getState Annex.fast <||> pure (relaxedOption (downloadOptions opts))) + ( addmediafast linkurl mediaurl mediakey + , downloadmedia linkurl mediaurl mediakey + ) where forced = Annex.getState Annex.force @@ -265,6 +265,42 @@ performDownload opts cache todownload = case location todownload of ( return Nothing , tryanother ) + + downloadmedia linkurl mediaurl mediakey + | rawOption (downloadOptions opts) = downloadlink + | otherwise = do + r <- withTmpWorkDir mediakey $ \workdir -> do + dl <- youtubeDl linkurl workdir + case dl of + Right (Just mediafile) -> do + let ext = case takeExtension mediafile of + [] -> ".m" + s -> s + ok <- rundownload linkurl ext $ \f -> do + addWorkTree webUUID mediaurl f mediakey (Just mediafile) + return [mediakey] + return (Just ok) + -- youtude-dl didn't support it, so + -- download it as if the link were + -- an enclosure. + Right Nothing -> Just <$> downloadlink + Left msg -> do + warning msg + return Nothing + return (fromMaybe False r) + where + downloadlink = performDownload opts cache todownload + { location = Enclosure linkurl } + + addmediafast linkurl mediaurl mediakey = + ifM (pure (not (rawOption (downloadOptions opts))) + <&&> youtubeDlSupported linkurl) + ( rundownload linkurl ".m" $ \f -> do + addWorkTree webUUID mediaurl f mediakey Nothing + return [mediakey] + , performDownload opts cache todownload + { location = Enclosure linkurl } + ) defaultTemplate :: String defaultTemplate = "${feedtitle}/${itemtitle}${extension}" diff --git a/Command/Indirect.hs b/Command/Indirect.hs index 862c6e00e..825b82004 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -42,7 +42,7 @@ start = ifM isDirect perform :: CommandPerform perform = do - showStart "commit" "" + showStart' "commit" Nothing whenM stageDirect $ do showOutput void $ inRepo $ Git.Branch.commitCommand Git.Branch.ManualCommit @@ -100,6 +100,6 @@ perform = do cleanup :: CommandCleanup cleanup = do - showStart "indirect" "" + showStart' "indirect" Nothing showEndOk return True diff --git a/Command/Init.hs b/Command/Init.hs index 3c38c0f8a..e7f57c29c 100644 --- a/Command/Init.hs +++ b/Command/Init.hs @@ -40,7 +40,7 @@ seek = commandAction . start start :: InitOptions -> CommandStart start os = do - showStart "init" (initDesc os) + showStart' "init" (Just $ initDesc os) next $ perform os perform :: InitOptions -> CommandPerform diff --git a/Command/InitRemote.hs b/Command/InitRemote.hs index d82dc366c..c52ca4c56 100644 --- a/Command/InitRemote.hs +++ b/Command/InitRemote.hs @@ -38,7 +38,7 @@ start (name:ws) = ifM (isJust <$> findExisting name) let c = newConfig name t <- either giveup return (findType config) - showStart "initremote" name + showStart' "initremote" (Just name) next $ perform t name $ M.union config c ) ) diff --git a/Command/Map.hs b/Command/Map.hs index ae568f8cc..9ae73d898 100644 --- a/Command/Map.hs +++ b/Command/Map.hs @@ -189,7 +189,7 @@ same a b {- reads the config of a remote, with progress display -} scan :: Git.Repo -> Annex Git.Repo scan r = do - showStart "map" $ Git.repoDescribe r + showStart' "map" (Just $ Git.repoDescribe r) v <- tryScan r case v of Just r' -> do diff --git a/Command/Merge.hs b/Command/Merge.hs index 4f99093ab..66b519973 100644 --- a/Command/Merge.hs +++ b/Command/Merge.hs @@ -23,7 +23,7 @@ seek _ = do mergeBranch :: CommandStart mergeBranch = do - showStart "merge" "git-annex" + showStart' "merge" (Just "git-annex") next $ do Annex.Branch.update -- commit explicitly, in case no remote branches were merged diff --git a/Command/MetaData.hs b/Command/MetaData.hs index fd5fd0838..9fba1097a 100644 --- a/Command/MetaData.hs +++ b/Command/MetaData.hs @@ -100,7 +100,7 @@ startKeys c o k ai = case getSet o of putStrLn . fromMetaValue stop _ -> do - showStart' "metadata" k ai + showStartKey "metadata" k ai next $ perform c o k perform :: VectorClock -> MetaDataOptions -> Key -> CommandPerform @@ -164,7 +164,7 @@ startBatch (i, (MetaData m)) = case i of Right k -> go k (mkActionItem k) where go k ai = do - showStart' "metadata" k ai + showStartKey "metadata" k ai let o = MetaDataOptions { forFiles = [] , getSet = if MetaData m == emptyMetaData diff --git a/Command/Move.hs b/Command/Move.hs index 04e6aa384..63b5fb8b0 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -87,7 +87,7 @@ start' o move afile key ai = onlyActionOn key $ toHereStart move afile key ai showMoveAction :: Bool -> Key -> ActionItem -> Annex () -showMoveAction move = showStart' (if move then "move" else "copy") +showMoveAction move = showStartKey (if move then "move" else "copy") {- Moves (or copies) the content of an annexed file to a remote. - diff --git a/Command/Multicast.hs b/Command/Multicast.hs index 7b0d54e8e..9a518a18f 100644 --- a/Command/Multicast.hs +++ b/Command/Multicast.hs @@ -78,7 +78,7 @@ seek (MultiCastOptions Receive _ _) = giveup "Cannot specify list of files with genAddress :: CommandStart genAddress = do - showStart "gen-address" "" + showStart' "gen-address" Nothing k <- uftpKey (s, ok) <- case k of KeyContainer s -> liftIO $ genkey (Param s) @@ -130,7 +130,7 @@ send ups fs = withTmpFile "send" $ \t h -> do whenM isDirect $ giveup "Sorry, multicast send cannot be done from a direct mode repository." - showStart "generating file list" "" + showStart' "generating file list" Nothing fs' <- seekHelper LsFiles.inRepo =<< workTreeItems fs matcher <- Limit.getMatcher let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $ @@ -143,7 +143,7 @@ send ups fs = withTmpFile "send" $ \t h -> do liftIO $ hClose h showEndOk - showStart "sending files" "" + showStart' "sending files" Nothing showOutput serverkey <- uftpKey u <- getUUID @@ -169,7 +169,7 @@ send ups fs = withTmpFile "send" $ \t h -> do receive :: [CommandParam] -> CommandStart receive ups = do - showStart "receiving multicast files" "" + showStart' "receiving multicast files" Nothing showNote "Will continue to run until stopped by ctrl-c" showOutput diff --git a/Command/NumCopies.hs b/Command/NumCopies.hs index 9e467da7a..d10f70063 100644 --- a/Command/NumCopies.hs +++ b/Command/NumCopies.hs @@ -48,7 +48,7 @@ startGet = next $ next $ do startSet :: Int -> CommandStart startSet n = do allowMessages - showStart "numcopies" (show n) + showStart' "numcopies" (Just $ show n) next $ next $ do setGlobalNumCopies $ NumCopies n return True diff --git a/Command/P2P.hs b/Command/P2P.hs index 58b5c3bd7..40a49b49f 100644 --- a/Command/P2P.hs +++ b/Command/P2P.hs @@ -97,7 +97,7 @@ genAddresses addrs = do -- Address is read from stdin, to avoid leaking it in shell history. linkRemote :: RemoteName -> CommandStart linkRemote remotename = do - showStart "p2p link" remotename + showStart' "p2p link" (Just remotename) next $ next promptaddr where promptaddr = do @@ -123,7 +123,7 @@ linkRemote remotename = do startPairing :: RemoteName -> [P2PAddress] -> CommandStart startPairing _ [] = giveup "No P2P networks are currrently available." startPairing remotename addrs = do - showStart "p2p pair" remotename + showStart' "p2p pair" (Just remotename) ifM (liftIO Wormhole.isInstalled) ( next $ performPairing remotename addrs , giveup "Magic Wormhole is not installed, and is needed for pairing. Install it from your distribution or from https://github.com/warner/magic-wormhole/" diff --git a/Command/RegisterUrl.hs b/Command/RegisterUrl.hs index 008e6436c..ef73e6728 100644 --- a/Command/RegisterUrl.hs +++ b/Command/RegisterUrl.hs @@ -27,10 +27,10 @@ seek = withWords start start :: [String] -> CommandStart start (keyname:url:[]) = do let key = mkKey keyname - showStart "registerurl" url + showStart' "registerurl" (Just url) next $ perform key url start [] = do - showStart "registerurl" "stdin" + showStart' "registerurl" (Just "stdin") next massAdd start _ = giveup "specify a key and an url" diff --git a/Command/Reinit.hs b/Command/Reinit.hs index 25001db43..6defa4e95 100644 --- a/Command/Reinit.hs +++ b/Command/Reinit.hs @@ -25,7 +25,7 @@ seek = withWords start start :: [String] -> CommandStart start ws = do - showStart "reinit" s + showStart' "reinit" (Just s) next $ perform s where s = unwords ws diff --git a/Command/ResolveMerge.hs b/Command/ResolveMerge.hs index 0ba6efb36..df72e24d4 100644 --- a/Command/ResolveMerge.hs +++ b/Command/ResolveMerge.hs @@ -23,7 +23,7 @@ seek = withNothing start start :: CommandStart start = do - showStart "resolvemerge" "" + showStart' "resolvemerge" Nothing us <- fromMaybe nobranch <$> inRepo Git.Branch.current d <- fromRepo Git.localGitDir let merge_head = d </> "MERGE_HEAD" diff --git a/Command/Schedule.hs b/Command/Schedule.hs index 5814d99f1..b39e652b2 100644 --- a/Command/Schedule.hs +++ b/Command/Schedule.hs @@ -28,7 +28,7 @@ start = parse parse (name:[]) = go name performGet parse (name:expr:[]) = go name $ \uuid -> do allowMessages - showStart "schedule" name + showStart' "schedule" (Just name) performSet expr uuid parse _ = giveup "Specify a repository." diff --git a/Command/SetPresentKey.hs b/Command/SetPresentKey.hs index da2a6fa3d..6e7075e8c 100644 --- a/Command/SetPresentKey.hs +++ b/Command/SetPresentKey.hs @@ -23,7 +23,7 @@ seek = withWords start start :: [String] -> CommandStart start (ks:us:vs:[]) = do - showStart' "setpresentkey" k (mkActionItem k) + showStartKey "setpresentkey" k (mkActionItem k) next $ perform k (toUUID us) s where k = fromMaybe (giveup "bad key") (file2key ks) diff --git a/Command/Sync.hs b/Command/Sync.hs index b2d0bd275..f63260ed4 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -299,7 +299,7 @@ syncRemotes' ps available = commit :: SyncOptions -> CommandStart commit o = stopUnless shouldcommit $ next $ next $ do commitmessage <- maybe commitMsg return (messageOption o) - showStart "commit" "" + showStart' "commit" Nothing Annex.Branch.commit "update" ifM isDirect ( do @@ -342,7 +342,7 @@ mergeLocal mergeconfig resolvemergeoverride currbranch@(Just _, _) = where go Nothing = stop go (Just syncbranch) = do - showStart "merge" $ Git.Ref.describe syncbranch + showStart' "merge" (Just $ Git.Ref.describe syncbranch) next $ next $ merge currbranch mergeconfig resolvemergeoverride Git.Branch.ManualCommit syncbranch mergeLocal _ _ (Nothing, madj) = do b <- inRepo Git.Branch.currentUnsafe @@ -401,7 +401,7 @@ updateBranch syncbranch updateto g = pullRemote :: SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandStart pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && wantpull) $ do - showStart "pull" (Remote.name remote) + showStart' "pull" (Just (Remote.name remote)) next $ do showOutput stopUnless fetch $ @@ -438,7 +438,7 @@ mergeRemote remote currbranch mergeconfig resolvemergeoverride = ifM isBareRepo pushRemote :: SyncOptions -> Remote -> CurrBranch -> CommandStart pushRemote _o _remote (Nothing, _) = stop pushRemote o remote (Just branch, _) = stopUnless (pure (pushOption o) <&&> needpush) $ do - showStart "push" (Remote.name remote) + showStart' "push" (Just (Remote.name remote)) next $ next $ do showOutput ok <- inRepoWithSshOptionsTo (Remote.repo remote) gc $ @@ -651,7 +651,7 @@ syncFile ebloom rs af k = onlyActionOn' k $ do , return [] ) get have = includeCommandAction $ do - showStart' "get" k (mkActionItem af) + showStartKey "get" k (mkActionItem af) next $ next $ getKey' k af have wantput r @@ -703,7 +703,7 @@ seekExportContent rs = or <$> forM rs go cleanupLocal :: CurrBranch -> CommandStart cleanupLocal (Nothing, _) = stop cleanupLocal (Just currb, _) = do - showStart "cleanup" "local" + showStart' "cleanup" (Just "local") next $ next $ do delbranch $ syncBranch currb delbranch $ syncBranch $ Git.Ref.base $ Annex.Branch.name @@ -717,7 +717,7 @@ cleanupLocal (Just currb, _) = do cleanupRemote :: Remote -> CurrBranch -> CommandStart cleanupRemote _ (Nothing, _) = stop cleanupRemote remote (Just b, _) = do - showStart "cleanup" (Remote.name remote) + showStart' "cleanup" (Just (Remote.name remote)) next $ next $ inRepo $ Git.Command.runBool [ Param "push" diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 75e438d79..1a5da42b2 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -58,7 +58,7 @@ seek o = commandAction $ start (fromInteger $ sizeOption o) (testRemote o) start :: Int -> RemoteName -> CommandStart start basesz name = do - showStart "testremote" name + showStart' "testremote" (Just name) fast <- Annex.getState Annex.fast r <- either giveup disableExportTree =<< Remote.byName' name rs <- catMaybes <$> mapM (adjustChunkSize r) (chunkSizes basesz fast) diff --git a/Command/Trust.hs b/Command/Trust.hs index 66a1c81b3..5f161af26 100644 --- a/Command/Trust.hs +++ b/Command/Trust.hs @@ -27,7 +27,7 @@ trustCommand c level = withWords start where start ws = do let name = unwords ws - showStart c name + showStart' c (Just name) u <- Remote.nameToUUID name next $ perform u perform uuid = do diff --git a/Command/Ungroup.hs b/Command/Ungroup.hs index ddcdba466..357f5685f 100644 --- a/Command/Ungroup.hs +++ b/Command/Ungroup.hs @@ -23,7 +23,7 @@ seek = withWords start start :: [String] -> CommandStart start (name:g:[]) = do - showStart "ungroup" name + showStart' "ungroup" (Just name) u <- Remote.nameToUUID name next $ perform u g start _ = giveup "Specify a repository and a group." diff --git a/Command/Unused.hs b/Command/Unused.hs index 916e6db25..27018cf38 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -70,7 +70,7 @@ start o = do Just "." -> (".", checkUnused refspec) Just "here" -> (".", checkUnused refspec) Just n -> (n, checkRemoteUnused n refspec) - showStart "unused" name + showStart' "unused" (Just name) next perform checkUnused :: RefSpec -> CommandPerform @@ -338,5 +338,5 @@ startUnused message unused badunused tmpunused maps n = search case M.lookup n m of Nothing -> search rest Just key -> do - showStart message (show n) + showStart' message (Just $ show n) next $ a key diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs index 696b794a5..1c3e62695 100644 --- a/Command/Upgrade.hs +++ b/Command/Upgrade.hs @@ -23,7 +23,7 @@ seek = withNothing start start :: CommandStart start = do - showStart "upgrade" "." + showStart' "upgrade" Nothing whenM (isNothing <$> getVersion) $ do initialize Nothing Nothing r <- upgrade False latestVersion diff --git a/Command/VAdd.hs b/Command/VAdd.hs index c94ce5722..800573732 100644 --- a/Command/VAdd.hs +++ b/Command/VAdd.hs @@ -23,7 +23,7 @@ seek = withWords start start :: [String] -> CommandStart start params = do - showStart "vadd" "" + showStart' "vadd" Nothing withCurrentView $ \view -> do let (view', change) = refineView view $ map parseViewParam $ reverse params diff --git a/Command/VCycle.hs b/Command/VCycle.hs index 28326e16f..9c1e24e0d 100644 --- a/Command/VCycle.hs +++ b/Command/VCycle.hs @@ -27,7 +27,7 @@ start = go =<< currentView where go Nothing = giveup "Not in a view." go (Just v) = do - showStart "vcycle" "" + showStart' "vcycle" Nothing let v' = v { viewComponents = vcycle [] (viewComponents v) } if v == v' then do diff --git a/Command/VFilter.hs b/Command/VFilter.hs index 130e2550c..fa545cddd 100644 --- a/Command/VFilter.hs +++ b/Command/VFilter.hs @@ -21,7 +21,7 @@ seek = withWords start start :: [String] -> CommandStart start params = do - showStart "vfilter" "" + showStart' "vfilter" Nothing withCurrentView $ \view -> do let view' = filterView view $ map parseViewParam $ reverse params diff --git a/Command/VPop.hs b/Command/VPop.hs index 58411001b..71da586c2 100644 --- a/Command/VPop.hs +++ b/Command/VPop.hs @@ -28,7 +28,7 @@ start ps = go =<< currentView where go Nothing = giveup "Not in a view." go (Just v) = do - showStart "vpop" (show num) + showStart' "vpop" (Just $ show num) removeView v (oldvs, vs) <- splitAt (num - 1) . filter (sameparentbranch v) <$> recentViews diff --git a/Command/View.hs b/Command/View.hs index 513e6d10c..298fad0ee 100644 --- a/Command/View.hs +++ b/Command/View.hs @@ -27,7 +27,7 @@ seek = withWords start start :: [String] -> CommandStart start [] = giveup "Specify metadata to include in view" start ps = do - showStart "view" "" + showStart' "view" Nothing view <- mkView ps go view =<< currentView where diff --git a/Command/Wanted.hs b/Command/Wanted.hs index fc1fa86bd..ac4175ecd 100644 --- a/Command/Wanted.hs +++ b/Command/Wanted.hs @@ -35,7 +35,7 @@ cmd' name desc getter setter = noMessages $ start (rname:[]) = go rname (performGet getter) start (rname:expr:[]) = go rname $ \uuid -> do allowMessages - showStart name rname + showStart' name (Just rname) performSet setter expr uuid start _ = giveup "Specify a repository." diff --git a/Command/Whereis.hs b/Command/Whereis.hs index c5543bc66..295d11994 100644 --- a/Command/Whereis.hs +++ b/Command/Whereis.hs @@ -53,7 +53,7 @@ start remotemap file key = startKeys remotemap key (mkActionItem afile) startKeys :: M.Map UUID Remote -> Key -> ActionItem -> CommandStart startKeys remotemap key ai = do - showStart' "whereis" key ai + showStartKey "whereis" key ai next $ perform remotemap key perform :: M.Map UUID Remote -> Key -> CommandPerform diff --git a/Logs/Web.hs b/Logs/Web.hs index ba71cb17d..abea00db6 100644 --- a/Logs/Web.hs +++ b/Logs/Web.hs @@ -100,15 +100,15 @@ removeTempUrl :: Key -> Annex () removeTempUrl key = Annex.changeState $ \s -> s { Annex.tempurls = M.delete key (Annex.tempurls s) } -data Downloader = WebDownloader | QuviDownloader | OtherDownloader +data Downloader = WebDownloader | YoutubeDownloader | QuviDownloader | OtherDownloader deriving (Eq, Show) {- To keep track of how an url is downloaded, it's mangled slightly in - - the log. For quvi, "quvi:" is prefixed. For urls that are handled by - - some other remote, ":" is prefixed. -} + - the log, with a prefix indicating when a Downloader is used. -} setDownloader :: URLString -> Downloader -> String setDownloader u WebDownloader = u setDownloader u QuviDownloader = "quvi:" ++ u +setDownloader u YoutubeDownloader = "yt:" ++ u setDownloader u OtherDownloader = ":" ++ u setDownloader' :: URLString -> Remote -> String @@ -118,6 +118,9 @@ setDownloader' u r getDownloader :: URLString -> (URLString, Downloader) getDownloader u = case separate (== ':') u of - ("quvi", u') -> (u', QuviDownloader) + ("yt", u') -> (u', YoutubeDownloader) + -- quvi is not used any longer; youtube-dl should be able to handle + -- all urls it did. + ("quvi", u') -> (u', YoutubeDownloader) ("", u') -> (u', OtherDownloader) _ -> (u, WebDownloader) diff --git a/Messages.hs b/Messages.hs index ff13b31ee..08a7bb719 100644 --- a/Messages.hs +++ b/Messages.hs @@ -7,9 +7,10 @@ module Messages ( showStart, + showStart', + showStartKey, ActionItem, mkActionItem, - showStart', showNote, showAction, showSideAction, @@ -66,8 +67,14 @@ showStart command file = outputMessage json $ where json = JSON.start command (Just file) Nothing -showStart' :: String -> Key -> ActionItem -> Annex () -showStart' command key i = outputMessage json $ +showStart' :: String -> Maybe String -> Annex () +showStart' command mdesc = outputMessage json $ + command ++ (maybe "" (" " ++) mdesc) ++ " " + where + json = JSON.start command Nothing Nothing + +showStartKey :: String -> Key -> ActionItem -> Annex () +showStartKey command key i = outputMessage json $ command ++ " " ++ actionItemDesc i key ++ " " where json = JSON.start command (actionItemWorkTreeFile i) (Just key) diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index f7e9759a4..83e08c5aa 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -107,7 +107,7 @@ fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever fileRetriever a k m callback = do f <- prepTmp k a f k m - callback (FileContent f) + pruneTmpWorkDirBefore f (callback . FileContent) -- A Retriever that generates a lazy ByteString containing the Key's -- content, and passes it to a callback action which will fully consume it diff --git a/Remote/Web.hs b/Remote/Web.hs index 233c17eb3..5aa1edd3e 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -19,8 +19,7 @@ import Logs.Web import Annex.UUID import Utility.Metered import qualified Annex.Url as Url -import Annex.Quvi -import qualified Utility.Quvi as Quvi +import Annex.YoutubeDl remote :: RemoteType remote = RemoteType @@ -80,9 +79,7 @@ downloadKey key _af dest p = unVerified $ get =<< getWebUrls key untilTrue urls $ \u -> do let (u', downloader) = getDownloader u case downloader of - QuviDownloader -> do - flip (downloadUrl key p) dest - =<< withQuviOptions Quvi.queryLinks [Quvi.httponly, Quvi.quiet] u' + YoutubeDownloader -> youtubeDlTo key u' dest _ -> downloadUrl key p [u'] dest downloadKeyCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool @@ -109,8 +106,7 @@ checkKey' key us = firsthit us (Right False) $ \u -> do let (u', downloader) = getDownloader u showChecking u' case downloader of - QuviDownloader -> - Right <$> withQuviOptions Quvi.check [Quvi.httponly, Quvi.quiet] u' + YoutubeDownloader -> youtubeDlCheck u' _ -> do Url.withUrlOptions $ catchMsgIO . Url.checkBoth u' (keySize key) @@ -126,4 +122,4 @@ getWebUrls :: Key -> Annex [URLString] getWebUrls key = filter supported <$> getUrls key where supported u = snd (getDownloader u) - `elem` [WebDownloader, QuviDownloader] + `elem` [WebDownloader, YoutubeDownloader] diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 05b5623a6..9a48ad173 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -67,7 +67,7 @@ data GitConfig = GitConfig , annexSyncContent :: Configurable Bool , annexDebug :: Bool , annexWebOptions :: [String] - , annexQuviOptions :: [String] + , annexYoutubeDlOptions :: [String] , annexAriaTorrentOptions :: [String] , annexWebDownloadCommand :: Maybe String , annexCrippledFileSystem :: Bool @@ -127,7 +127,7 @@ extractGitConfig r = GitConfig getmaybebool (annex "synccontent") , annexDebug = getbool (annex "debug") False , annexWebOptions = getwords (annex "web-options") - , annexQuviOptions = getwords (annex "quvi-options") + , annexYoutubeDlOptions = getwords (annex "youtube-dl-options") , annexAriaTorrentOptions = getwords (annex "aria-torrent-options") , annexWebDownloadCommand = getmaybe (annex "web-download-command") , annexCrippledFileSystem = getbool (annex "crippledfilesystem") False diff --git a/Types/Transfer.hs b/Types/Transfer.hs index ade8fc763..1b54f8511 100644 --- a/Types/Transfer.hs +++ b/Types/Transfer.hs @@ -5,9 +5,12 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE FlexibleInstances #-} + module Types.Transfer where import Types +import Types.Remote (Verification(..)) import Utility.PID import Utility.QuickCheck @@ -66,3 +69,25 @@ instance Arbitrary TransferInfo where -- associated file cannot be empty (but can be Nothing) <*> (AssociatedFile <$> arbitrary `suchThat` (/= Just "")) <*> arbitrary + +class Observable a where + observeBool :: a -> Bool + observeFailure :: a + +instance Observable Bool where + observeBool = id + observeFailure = False + +instance Observable (Bool, Verification) where + observeBool = fst + observeFailure = (False, UnVerified) + +instance Observable (Either e Bool) where + observeBool (Left _) = False + observeBool (Right b) = b + observeFailure = Right False + +instance Observable (Maybe a) where + observeBool (Just _) = True + observeBool Nothing = False + observeFailure = Nothing diff --git a/Utility/HtmlDetect.hs b/Utility/HtmlDetect.hs new file mode 100644 index 000000000..57a56c95f --- /dev/null +++ b/Utility/HtmlDetect.hs @@ -0,0 +1,35 @@ +{- html detection + - + - Copyright 2017 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +module Utility.HtmlDetect where + +import Text.HTML.TagSoup +import Data.Char + +-- | Detect if a string is a html document. +-- +-- The document many not be valid, and will still be detected as html, +-- as long as it starts with a "<html>" or "<!DOCTYPE html>" tag. +-- +-- Html fragments like "<p>this</p>" are not detected as being html, +-- although some browsers may chose to render them as html. +isHtml :: String -> Bool +isHtml = evaluate . canonicalizeTags . parseTags . shorten + where + -- We only care about the beginning of the file, + -- so although tagsoup parses lazily anyway, truncate it. + shorten = take 16384 + evaluate (TagOpen "!DOCTYPE" ((t, _):_):_) = map toLower t == "html" + evaluate (TagOpen "html" _:_) = True + -- Allow some leading whitespace before the tag. + evaluate (TagText t:rest) + | all isSpace t = evaluate rest + | otherwise = False + -- It would be pretty weird to have a html comment before the html + -- tag, but easy to allow for. + evaluate (TagComment _:rest) = evaluate rest + evaluate _ = False diff --git a/Utility/Quvi.hs b/Utility/Quvi.hs deleted file mode 100644 index ff1ad854c..000000000 --- a/Utility/Quvi.hs +++ /dev/null @@ -1,162 +0,0 @@ -{- querying quvi (import qualified) - - - - Copyright 2013 Joey Hess <id@joeyh.name> - - - - License: BSD-2-clause - -} - -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE FlexibleContexts #-} - -module Utility.Quvi where - -import Common -import Utility.Url - -import Data.Aeson -import qualified Data.ByteString as B -import qualified Data.ByteString.Lazy as BL -import qualified Data.Map as M -import Network.URI (uriAuthority, uriRegName) -import Data.Char - -data QuviVersion - = Quvi04 - | Quvi09 - | NoQuvi - deriving (Show) - -data Page = Page - { pageTitle :: String - , pageLinks :: [Link] - } deriving (Show) - -data Link = Link - { linkSuffix :: Maybe String - , linkUrl :: URLString - } deriving (Show) - -{- JSON instances for quvi 0.4. -} -instance FromJSON Page where - parseJSON (Object v) = Page - <$> v .: "page_title" - <*> v .: "link" - parseJSON _ = mzero - -instance FromJSON Link where - parseJSON (Object v) = Link - <$> v .:? "file_suffix" - <*> v .: "url" - parseJSON _ = mzero - -{- "enum" format used by quvi 0.9 -} -parseEnum :: String -> Maybe Page -parseEnum s = Page - <$> get "QUVI_MEDIA_PROPERTY_TITLE" - <*> ((:[]) <$> - ( Link - <$> Just <$> (get "QUVI_MEDIA_STREAM_PROPERTY_CONTAINER") - <*> get "QUVI_MEDIA_STREAM_PROPERTY_URL" - ) - ) - where - get = flip M.lookup m - m = M.fromList $ map (separate (== '=')) $ lines s - -probeVersion :: IO QuviVersion -probeVersion = catchDefaultIO NoQuvi $ - examine <$> processTranscript "quvi" ["--version"] Nothing - where - examine (s, True) - | "quvi v0.4" `isInfixOf` s = Quvi04 - | otherwise = Quvi09 - examine _ = NoQuvi - -type Query a = QuviVersion -> [CommandParam] -> URLString -> IO a - -{- Throws an error when quvi is not installed. -} -forceQuery :: Query (Maybe Page) -forceQuery v ps url = query' v ps url `catchNonAsync` onerr - where - onerr e = ifM (inPath "quvi") - ( giveup ("quvi failed: " ++ show e) - , giveup "quvi is not installed" - ) - -{- Returns Nothing if the page is not a video page, or quvi is not - - installed. -} -query :: Query (Maybe Page) -query v ps url = flip catchNonAsync (const $ return Nothing) (query' v ps url) - -query' :: Query (Maybe Page) -query' Quvi09 ps url = parseEnum - <$> readQuvi (toCommand $ [Param "dump", Param "-p", Param "enum"] ++ ps ++ [Param url]) -query' Quvi04 ps url = do - let p = proc "quvi" (toCommand $ ps ++ [Param url]) - decode . BL.fromStrict - <$> withHandle StdoutHandle createProcessSuccess p B.hGetContents -query' NoQuvi _ _ = return Nothing - -queryLinks :: Query [URLString] -queryLinks v ps url = maybe [] (map linkUrl . pageLinks) <$> query v ps url - -{- Checks if quvi can still find a download link for an url. - - If quvi is not installed, returns False. -} -check :: Query Bool -check v ps url = maybe False (not . null . pageLinks) <$> query v ps url - -{- Checks if an url is supported by quvi, as quickly as possible - - (without hitting it if possible), and without outputting - - anything. Also returns False if quvi is not installed. -} -supported :: QuviVersion -> URLString -> IO Bool -supported NoQuvi _ = return False -supported Quvi04 url = boolSystem "quvi" - [ Param "--verbosity", Param "mute" - , Param "--support" - , Param url - ] -{- Use quvi-info to see if the url's domain is supported. - - If so, have to do a online verification of the url. -} -supported Quvi09 url = (firstlevel <&&> secondlevel) - `catchNonAsync` (\_ -> return False) - where - firstlevel = case uriAuthority =<< parseURIRelaxed url of - Nothing -> return False - Just auth -> do - let domain = map toLower $ uriRegName auth - let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ splitc '.' domain - any (\h -> domain `isSuffixOf` h || basedomain `isSuffixOf` h) - . map (map toLower) <$> listdomains Quvi09 - secondlevel = snd <$> processTranscript "quvi" - (toCommand [Param "dump", Param "-o", Param url]) Nothing - -listdomains :: QuviVersion -> IO [String] -listdomains Quvi09 = concatMap (splitc ',') - . concatMap (drop 1 . words) - . filter ("domains: " `isPrefixOf`) . lines - <$> readQuvi (toCommand [Param "info", Param "-p", Param "domains"]) -listdomains _ = return [] - -type QuviParams = QuviVersion -> [CommandParam] - -{- Disables progress, but not information output. -} -quiet :: QuviParams --- Cannot use quiet as it now disables informational output. --- No way to disable progress. -quiet Quvi09 = [Param "--verbosity", Param "verbose"] -quiet Quvi04 = [Param "--verbosity", Param "quiet"] -quiet NoQuvi = [] - -{- Only return http results, not streaming protocols. -} -httponly :: QuviParams --- No way to do it with 0.9? -httponly Quvi04 = [Param "-c", Param "http"] -httponly _ = [] -- No way to do it with 0.9? - -readQuvi :: [String] -> IO String -readQuvi ps = withHandle StdoutHandle createProcessSuccess p $ \h -> do - r <- hGetContentsStrict h - hClose h - return r - where - p = proc "quvi" ps diff --git a/debian/control b/debian/control index b34a79002..a20506002 100644 --- a/debian/control +++ b/debian/control @@ -25,6 +25,7 @@ Build-Depends: libghc-dlist-dev, libghc-uuid-dev, libghc-aeson-dev, + libghc-tagsoup-dev, libghc-unordered-containers-dev, libghc-ifelse-dev, libghc-bloomfilter-dev, @@ -106,7 +107,7 @@ Recommends: lsof, gnupg, bind9-host, - quvi, + youtube-dl, git-remote-gcrypt (>= 0.20130908-6), nocache, aria2, diff --git a/doc/devblog/youtube-dl_day_3.mdwn b/doc/devblog/youtube-dl_day_3.mdwn new file mode 100644 index 000000000..bc868d25c --- /dev/null +++ b/doc/devblog/youtube-dl_day_3.mdwn @@ -0,0 +1,7 @@ +Finished up youtube-dl integration today, including all the edge cases in +`addurl` and honoring annex.diskreserve. + +I changed my mind about `git annex addurl --relaxed`; it seems better for +it to be slower than before, but not have surprising behavior, than to be +fast but potentially surprising. If it's too slow, add `--raw` to avoid +using youtube-dl. diff --git a/doc/forum/downloading_mp4_by_default_with_importfeed/comment_1_c9e5a3700764faa33d2c68de6a8236dc._comment b/doc/forum/downloading_mp4_by_default_with_importfeed/comment_1_c9e5a3700764faa33d2c68de6a8236dc._comment index 23282e835..07691d823 100644 --- a/doc/forum/downloading_mp4_by_default_with_importfeed/comment_1_c9e5a3700764faa33d2c68de6a8236dc._comment +++ b/doc/forum/downloading_mp4_by_default_with_importfeed/comment_1_c9e5a3700764faa33d2c68de6a8236dc._comment @@ -8,4 +8,8 @@ option to make it prefer mp4, you can pass it using the annex.quvi-options git configuration setting. Or maybe it has a config file. I don't know how to configure quvi to do that, but if it's possible to, you should be able to get git-annex to run it with that configuration. + +Update: Now git-annex uses youtube-dl, so you can use the +annex.youtube-dl-options git configuration setting to configure youtube-dl +to download your preferred format. """]] diff --git a/doc/git-annex-addurl.mdwn b/doc/git-annex-addurl.mdwn index aace42009..a43976b56 100644 --- a/doc/git-annex-addurl.mdwn +++ b/doc/git-annex-addurl.mdwn @@ -10,8 +10,8 @@ git annex addurl `[url ...]` Downloads each url to its own file, which is added to the annex. -When `quvi` is installed, urls are automatically tested to see if they -point to a video hosting site, and the video is downloaded instead. +When `youtube-dl` is installed, it's used to check for a video embedded in +a web page at the url, and that is added to the annex instead. Urls to torrent files (including magnet links) will cause the content of the torrent to be downloaded, using `aria2c`. @@ -30,12 +30,17 @@ be used to get better filenames. * `--relaxed` - Avoid storing the size of the url's content, and accept whatever - content is there at a future point. (Implies `--fast`.) + Don't immediately download the url, and avoid storing the size of the + url's content. This makes git-annex accept whatever content is there + at a future point. + This is the fastest option, but it still has to access the network + to check if the url contains embedded media. When adding large numbers + of urls, using `--relaxed --raw` is much faster. + * `--raw` - Prevent special handling of urls by quvi, bittorrent, and other + Prevent special handling of urls by youtube-dl, bittorrent, and other special remotes. This will for example, make addurl download the .torrent file and not the contents it points to. diff --git a/doc/git-annex-importfeed.mdwn b/doc/git-annex-importfeed.mdwn index 241d369af..2f146fbfe 100644 --- a/doc/git-annex-importfeed.mdwn +++ b/doc/git-annex-importfeed.mdwn @@ -8,14 +8,13 @@ git annex importfeed `[url ...]` # DESCRIPTION -Imports the contents of podcast feeds. Only downloads files whose +Imports the contents of podcasts and other feeds. Only downloads files whose content has not already been added to the repository before, so you can delete, rename, etc the resulting files and repeated runs won't duplicate them. -When quvi is installed, links in the feed are tested to see if they -are on a video hosting site, and the video is downloaded. This allows -importing e.g., YouTube playlists. +When `youtube-dl` is installed, it's used to download links in the feed. +This allows importing e.g., YouTube playlists. To make the import process add metadata to the imported files from the feed, `git config annex.genmetadata true` @@ -38,6 +37,23 @@ To make the import process add metadata to the imported files from the feed, These options behave the same as when using [[git-annex-addurl]](1). +* `--fast` + + Avoid immediately downloading urls. The url is still checked + (via HEAD) to verify that it exists, and to get its size if possible. + +* `--relaxed` + + Don't immediately download urls, and avoid storing the size of the + url's content. This makes git-annex accept whatever content is there + at a future point. + +* `--raw` + + Prevent special handling of urls by youtube-dl, bittorrent, and other + special remotes. This will for example, make importfeed + download a .torrent file and not the contents it points to. + # SEE ALSO [[git-annex]](1) diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index c3aa8b991..8f84f1d9e 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1315,10 +1315,16 @@ Here are all the supported configuration settings. Options to pass when running wget or curl. For example, to force IPv4 only, set it to "-4" -* `annex.quvi-options` +* `annex.youtube-dl-options` - Options to pass to quvi when using it to find the url to download for a - video. + Options to pass to youtube-dl when using it to find the url to download + for a video. + + Some options may break git-annex's integration with youtube-dl. For + example, the --output option could cause it to store files somewhere + git-annex won't find them. Avoid setting here or in the youtube-dl config + file any options that cause youtube-dl to download more than one file, + or to store the file anywhere other than the current working directory. * `annex.aria-torrent-options` diff --git a/doc/tips/downloading_podcasts.mdwn b/doc/tips/downloading_podcasts.mdwn index 2926c6710..13b8300db 100644 --- a/doc/tips/downloading_podcasts.mdwn +++ b/doc/tips/downloading_podcasts.mdwn @@ -73,13 +73,16 @@ and transferring to your laptop on demand. ## youtube playlists -If your git-annex is also built with quvi support, you can also use -`git annex importfeed` on youtube playlists. It will automatically download -the videos linked to by the playlist. - -For this you need an rss file containing links to the videos. -For example, this url currently works: -<http://gdata.youtube.com/feeds/api/playlists/PLz8ZG1e9MPlzefklz1Gv79icjywTXycR-> +You can also use `git annex importfeed` on youtube playlists. +It will use [youtube-dl](https://rg3.github.io/youtube-dl/) to automatically +download the videos linked to by the playlist. + +To download a youtube playlist, you need to find the feed associated with that +playlist, and pass it to `git annex importfeed`. There does not seem to be +an easy link anywhere to get the feed, but you can construct its url +manually. For a playlist like +"https://www.youtube.com/playlist?list=PL4F80C7D2DC8D9B6C", the +feed is "https://www.youtube.com/feeds/videos.xml?playlist_id=PL4F80C7D2DC8D9B6C" ## metadata diff --git a/doc/tips/using_the_web_as_a_special_remote.mdwn b/doc/tips/using_the_web_as_a_special_remote.mdwn index 2dc3419ad..8bd9c7457 100644 --- a/doc/tips/using_the_web_as_a_special_remote.mdwn +++ b/doc/tips/using_the_web_as_a_special_remote.mdwn @@ -68,41 +68,34 @@ number takes that many paths from the end. ## videos -<a name=quvi></a> +<a name=videos></a> There's support for downloading videos from sites like YouTube, Vimeo, -and many more. This relies on [quvi](http://quvi.sourceforge.net/) to find -urls to the actual videos files. +and many more. This relies on [youtube-dl](https://rg3.github.io/youtube-dl/) +to download the videos. -When you have quvi installed, you can just +When you have youtube-dl installed, you can just `git annex addurl http://youtube.com/foo` and it will detect that it is a video and download the video content for offline viewing. Later, in another clone of the repository, you can run `git annex get` on -the file and it will also be downloaded with the help of quvi. This works +the file and it will also be downloaded with youtube-dl. This works even if the video host has transcoded or otherwise changed the video in the meantime; the assumption is that these video files are equivalent. -There is an `annex.quvi-options` configuration setting that can be used +There is an `annex.youtube-dl-options` configuration setting that can be used to pass parameters to quvi. For example, you could set `git config -annex.quvi-options "--format low"` to configure it to download low -quality videos from YouTube. - -Note that for performance reasons, the url is not checked for redirects, -so some shortened urls will not be detected. You can -either load the short url in a browser to get the full url, or you -can force use of quvi with redirect detection, by prepending "quvi:" to the -url. - -To download a youtube playlist, you need to find the feed associated with that -playlist, and pass it to `git annex importfeed`. There does not seem to be -an easy link anywhere to get the feed, but you can construct its url +annex.youtube-dl-options "--format worst"` to configure it to download low +quality videos from YouTube. Note that the youtube-dl configuration files +are not read when git-annex runs youtube-dl, to avoid config settings that +break its integration. + +To download a youtube playlist, you need to find the RSS feed associated with +that playlist, and pass it to `git annex importfeed`. There does not seem to +be an easy link anywhere to get the RSS feed, but you can construct its url manually. For a playlist like "https://www.youtube.com/playlist?list=PL4F80C7D2DC8D9B6C", the -feed is "http://gdata.youtube.com/feeds/api/playlists/PL4F80C7D2DC8D9B6C" - -More details about youtube feeds at <http://googlesystem.blogspot.com/2008/01/youtube-feeds.html> --- `git-annex importfeed` should handle all of them. +feed is "https://www.youtube.com/feeds/videos.xml?playlist_id=PL4F80C7D2DC8D9B6C" ## bittorrent diff --git a/doc/todo/switch_from_quvi_to_youtube-dl.mdwn b/doc/todo/switch_from_quvi_to_youtube-dl.mdwn index cfdd8a8a6..3fa7b0ccb 100644 --- a/doc/todo/switch_from_quvi_to_youtube-dl.mdwn +++ b/doc/todo/switch_from_quvi_to_youtube-dl.mdwn @@ -16,21 +16,43 @@ urls, see for example http://bugs.debian.org/874321) So, switching to youtube-dl would probably need a new switch, like `git annex addurl --rip` that enables using it. -Currently `git annex importfeed` automatically tests for video urls with -quvi; it would also need to support `--rip`. +(Importfeed only treats links in the feed as video urls, not enclosures, +so this problem does not affect it and it would not need a new switch.) -Both of those changes would need changes to user's workflows and cron jobs. -git-annex could keep supporting quvi for some time, and warn when it uses -quvi, to help with the transition. +That would need changes to users' workflows. git-annex could keep +supporting quvi for some time, and warn when it uses quvi, to +help with the transition. + +> Alternatively, git-annex addurl could download the url first, and then +> check the file to see if it looks like html. If so, run youtube-dl (which +> unfortunately has to download it again) and see if it manages to rip +> media from it. This way, addurl of non-html files does not have extra +> overhead, and the redundant download is fairly small compared to ripping +> the media. Only the unusual case where addurl is being used on html that +> does not contain media becomes more expensive. +> +> However, for --relaxed, running youtube-dl --get-filename would be +> significantly more expensive since it hits the network. It seems that +> --relaxed would need to change to not rip videos; users who want that +> could use --fast. +> +> --fast already hits the network, but +> if it uses youtube-dl --get-filename, it would fall afoul of +> bugs like <http://bugs.debian.org/874321>, although those can be worked +> around (/dev/null stderr in cast youtube-dl crashes) Another gotcha is playlists. youtube-dl downloads playlists automatically. But, git-annex needs to record an url that downloads a single file so that `git annex get` works right. So, playlists will need to be disabled when git-annex runs youtube-dl. But, `--no-playlist` does not always disable -playlists. Best option seems to be `--playlist-items 0` which works for +playlists. Best option seems to be `--no-playlist --playlist-items 0` which works for non-playlists, and downloads only 1 item from playlists (hopefully a fairly stable item, but who knows..). +(`git annex importfeed` handles youtube playlist downloads, but needs the +user to find the url to the rss feed for the playlist. Youtube still has +these, although it makes them hard to find.) + Another gotcha is that youtube-dl's -o option does not fully determine the filename it downloads to. Sometims it will tack on an additional extension (seen with youtube videos where it added a ".mkv"). @@ -38,3 +60,12 @@ And --get-filename does not report the actual filename when that happens. This seems to be due to format merging by ffmpeg; with -f best, it does not merge and so does not do that. <https://github.com/rg3/youtube-dl/issues/14864> + +To do disk free space checking will need a different technique than +git-annex normally uses, because youtube-dl does not provide an easy way to +query for size. Could use --dump-json, but that would require downloading +the web page yet again, so too expensive.. and, the json seems to have +"filesize: null" for youtube videos. What does work is the --max-filesize +option, which makes youtube-dl abort if it's too big. + +> [[done]] --[[Joey]] diff --git a/git-annex.cabal b/git-annex.cabal index 5d46caed3..4e2269a51 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -347,6 +347,7 @@ Executable git-annex persistent, persistent-template, aeson, + tagsoup, unordered-containers, feed (>= 0.3.9), regex-tdfa, @@ -530,7 +531,6 @@ Executable git-annex Annex.Path Annex.Perms Annex.Queue - Annex.Quvi Annex.ReplaceFile Annex.SpecialRemote Annex.Ssh @@ -546,6 +546,7 @@ Executable git-annex Annex.View.ViewedFile Annex.Wanted Annex.WorkTree + Annex.YoutubeDl Assistant Assistant.Alert Assistant.Alert.Utility @@ -1001,6 +1002,7 @@ Executable git-annex Utility.Glob Utility.Gpg Utility.Hash + Utility.HtmlDetect Utility.HumanNumber Utility.HumanTime Utility.InodeCache @@ -1032,7 +1034,6 @@ Executable git-annex Utility.Process Utility.Process.Shim Utility.QuickCheck - Utility.Quvi Utility.Rsync Utility.SRV Utility.SafeCommand |