summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex.hs3
-rw-r--r--Annex/Content.hs62
-rw-r--r--Annex/Locations.hs16
-rw-r--r--Annex/Notification.hs55
-rw-r--r--Annex/Quvi.hs33
-rw-r--r--Annex/Transfer.hs20
-rw-r--r--Annex/YoutubeDl.hs142
-rw-r--r--BuildInfo.hs1
-rw-r--r--CHANGELOG14
-rw-r--r--Command/AddUrl.hs372
-rw-r--r--Command/Adjust.hs2
-rw-r--r--Command/Config.hs4
-rw-r--r--Command/Dead.hs2
-rw-r--r--Command/Describe.hs2
-rw-r--r--Command/Direct.hs4
-rw-r--r--Command/Drop.hs4
-rw-r--r--Command/DropKey.hs2
-rw-r--r--Command/DropUnused.hs2
-rw-r--r--Command/EnableRemote.hs4
-rw-r--r--Command/EnableTor.hs2
-rw-r--r--Command/Expire.hs4
-rw-r--r--Command/Forget.hs2
-rw-r--r--Command/FromKey.hs2
-rw-r--r--Command/Fsck.hs2
-rw-r--r--Command/Get.hs2
-rw-r--r--Command/Group.hs2
-rw-r--r--Command/GroupWanted.hs2
-rw-r--r--Command/ImportFeed.hs110
-rw-r--r--Command/Indirect.hs4
-rw-r--r--Command/Init.hs2
-rw-r--r--Command/InitRemote.hs2
-rw-r--r--Command/Map.hs2
-rw-r--r--Command/Merge.hs2
-rw-r--r--Command/MetaData.hs4
-rw-r--r--Command/Move.hs2
-rw-r--r--Command/Multicast.hs8
-rw-r--r--Command/NumCopies.hs2
-rw-r--r--Command/P2P.hs4
-rw-r--r--Command/RegisterUrl.hs4
-rw-r--r--Command/Reinit.hs2
-rw-r--r--Command/ResolveMerge.hs2
-rw-r--r--Command/Schedule.hs2
-rw-r--r--Command/SetPresentKey.hs2
-rw-r--r--Command/Sync.hs14
-rw-r--r--Command/TestRemote.hs2
-rw-r--r--Command/Trust.hs2
-rw-r--r--Command/Ungroup.hs2
-rw-r--r--Command/Unused.hs4
-rw-r--r--Command/Upgrade.hs2
-rw-r--r--Command/VAdd.hs2
-rw-r--r--Command/VCycle.hs2
-rw-r--r--Command/VFilter.hs2
-rw-r--r--Command/VPop.hs2
-rw-r--r--Command/View.hs2
-rw-r--r--Command/Wanted.hs2
-rw-r--r--Command/Whereis.hs2
-rw-r--r--Logs/Web.hs11
-rw-r--r--Messages.hs13
-rw-r--r--Remote/Helper/Special.hs2
-rw-r--r--Remote/Web.hs12
-rw-r--r--Types/GitConfig.hs4
-rw-r--r--Types/Transfer.hs25
-rw-r--r--Utility/HtmlDetect.hs35
-rw-r--r--Utility/Quvi.hs162
-rw-r--r--debian/control3
-rw-r--r--doc/devblog/youtube-dl_day_3.mdwn7
-rw-r--r--doc/forum/downloading_mp4_by_default_with_importfeed/comment_1_c9e5a3700764faa33d2c68de6a8236dc._comment4
-rw-r--r--doc/git-annex-addurl.mdwn15
-rw-r--r--doc/git-annex-importfeed.mdwn24
-rw-r--r--doc/git-annex.mdwn12
-rw-r--r--doc/tips/downloading_podcasts.mdwn17
-rw-r--r--doc/tips/using_the_web_as_a_special_remote.mdwn37
-rw-r--r--doc/todo/switch_from_quvi_to_youtube-dl.mdwn43
-rw-r--r--git-annex.cabal5
74 files changed, 795 insertions, 592 deletions
diff --git a/Annex.hs b/Annex.hs
index 32a303239..427c479d8 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -64,7 +64,6 @@ import Types.LockCache
import Types.DesktopNotify
import Types.CleanupActions
import qualified Database.Keys.Handle as Keys
-import Utility.Quvi (QuviVersion)
import Utility.InodeCache
import Utility.Url
@@ -134,7 +133,6 @@ data AnnexState = AnnexState
, errcounter :: Integer
, unusedkeys :: Maybe (S.Set Key)
, tempurls :: M.Map Key URLString
- , quviversion :: Maybe QuviVersion
, existinghooks :: M.Map Git.Hook.Hook Bool
, desktopnotify :: DesktopNotify
, workers :: [Either AnnexState (Async AnnexState)]
@@ -190,7 +188,6 @@ newState c r = do
, errcounter = 0
, unusedkeys = Nothing
, tempurls = M.empty
- , quviversion = Nothing
, existinghooks = M.empty
, desktopnotify = mempty
, workers = []
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 9661f068a..986f673f6 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -21,6 +21,7 @@ module Annex.Content (
prepTmp,
withTmp,
checkDiskSpace,
+ needMoreDiskSpace,
moveAnnex,
populatePointerFile,
linkToAnnex,
@@ -42,11 +43,13 @@ module Annex.Content (
dirKeys,
withObjectLoc,
staleKeysPrune,
+ pruneTmpWorkDirBefore,
isUnmodified,
verifyKeyContent,
VerifyConfig(..),
Verification(..),
unVerified,
+ withTmpWorkDir,
) where
import System.IO.Unsafe (unsafeInterleaveIO)
@@ -303,7 +306,7 @@ getViaTmp' v key action = do
(ok, verification) <- action tmpfile
if ok
then ifM (verifyKeyContent v verification key tmpfile)
- ( ifM (moveAnnex key tmpfile)
+ ( ifM (pruneTmpWorkDirBefore tmpfile (moveAnnex key))
( do
logStatus key InfoPresent
return True
@@ -311,7 +314,7 @@ getViaTmp' v key action = do
)
, do
warning "verification of content failed"
- liftIO $ nukeFile tmpfile
+ pruneTmpWorkDirBefore tmpfile (liftIO . nukeFile)
return False
)
-- On transfer failure, the tmp file is left behind, in case
@@ -386,7 +389,7 @@ prepTmp key = do
createAnnexDirectory (parentDir tmp)
return tmp
-{- Creates a temp file for a key, runs an action on it, and cleans up
+{- Prepares a temp file for a key, runs an action on it, and cleans up
- the temp file. If the action throws an exception, the temp file is
- left behind, which allows for resuming.
-}
@@ -394,7 +397,7 @@ withTmp :: Key -> (FilePath -> Annex a) -> Annex a
withTmp key action = do
tmp <- prepTmp key
res <- action tmp
- liftIO $ nukeFile tmp
+ pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
return res
{- Checks that there is disk space available to store a given key,
@@ -429,16 +432,17 @@ checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getSta
let delta = need + reserve - have - alreadythere + inprogress
let ok = delta <= 0
unless ok $
- needmorespace delta
+ warning $ needMoreDiskSpace delta
return ok
_ -> return True
)
where
dir = maybe (fromRepo gitAnnexDir) return destdir
- needmorespace n =
- warning $ "not enough free space, need " ++
- roughSize storageUnits True n ++
- " more" ++ forcemsg
+
+needMoreDiskSpace :: Integer -> String
+needMoreDiskSpace n = "not enough free space, need " ++
+ roughSize storageUnits True n ++ " more" ++ forcemsg
+ where
forcemsg = " (use --force to override this check or adjust annex.diskreserve)"
{- Moves a key's content into .git/annex/objects/
@@ -989,7 +993,8 @@ staleKeysPrune dirspec nottransferred = do
let stale = contents `exclude` dups
dir <- fromRepo dirspec
- liftIO $ forM_ dups $ \t -> removeFile $ dir </> keyFile t
+ forM_ dups $ \k ->
+ pruneTmpWorkDirBefore (dir </> keyFile k) (liftIO . removeFile)
if nottransferred
then do
@@ -998,6 +1003,43 @@ staleKeysPrune dirspec nottransferred = do
return $ filter (`S.notMember` inprogress) stale
else return stale
+{- Prune the work dir associated with the specified content file,
+ - before performing an action that deletes the file, or moves it away.
+ -
+ - This preserves the invariant that the workdir never exists without
+ - the content file.
+ -}
+pruneTmpWorkDirBefore :: FilePath -> (FilePath -> Annex a) -> Annex a
+pruneTmpWorkDirBefore f action = do
+ let workdir = gitAnnexTmpWorkDir f
+ liftIO $ whenM (doesDirectoryExist workdir) $
+ removeDirectoryRecursive workdir
+ action f
+
+{- Runs an action, passing it a temporary work directory where
+ - it can write files while receiving the content of a key.
+ -
+ - On exception, or when the action returns Nothing,
+ - the temporary work directory is left, so resumes can use it.
+ -}
+withTmpWorkDir :: Key -> (FilePath -> Annex (Maybe a)) -> Annex (Maybe a)
+withTmpWorkDir key action = do
+ -- Create the object file if it does not exist. This way,
+ -- staleKeysPrune only has to look for object files, and can
+ -- clean up gitAnnexTmpWorkDir for those it finds.
+ obj <- prepTmp key
+ unlessM (liftIO $ doesFileExist obj) $ do
+ liftIO $ writeFile obj ""
+ setAnnexFilePerm obj
+ let tmpdir = gitAnnexTmpWorkDir obj
+ liftIO $ createDirectoryIfMissing True tmpdir
+ setAnnexDirPerm tmpdir
+ res <- action tmpdir
+ case res of
+ Just _ -> liftIO $ removeDirectoryRecursive tmpdir
+ Nothing -> noop
+ return res
+
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
-
diff --git a/Annex/Locations.hs b/Annex/Locations.hs
index f86dfc6f4..bb45d8a3e 100644
--- a/Annex/Locations.hs
+++ b/Annex/Locations.hs
@@ -1,6 +1,6 @@
{- git-annex file locations
-
- - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -27,6 +27,7 @@ module Annex.Locations (
gitAnnexTmpMiscDir,
gitAnnexTmpObjectDir,
gitAnnexTmpObjectLocation,
+ gitAnnexTmpWorkDir,
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
@@ -251,6 +252,19 @@ gitAnnexTmpObjectDir r = addTrailingPathSeparator $ gitAnnexDir r </> "tmp"
gitAnnexTmpObjectLocation :: Key -> Git.Repo -> FilePath
gitAnnexTmpObjectLocation key r = gitAnnexTmpObjectDir r </> keyFile key
+{- Given a temp file such as gitAnnexTmpObjectLocation, makes a name for a
+ - subdirectory in the same location, that can be used as a work area
+ - when receiving the key's content.
+ -
+ - There are ordering requirements for creating these directories;
+ - use Annex.Content.withTmpWorkDir to set them up.
+ -}
+gitAnnexTmpWorkDir :: FilePath -> FilePath
+gitAnnexTmpWorkDir p =
+ let (dir, f) = splitFileName p
+ -- Using a prefix avoids name conflict with any other keys.
+ in dir </> "work." ++ f
+
{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> FilePath
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"
diff --git a/Annex/Notification.hs b/Annex/Notification.hs
index 0501c0db7..f3d5006e3 100644
--- a/Annex/Notification.hs
+++ b/Annex/Notification.hs
@@ -5,12 +5,14 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
{-# LANGUAGE CPP #-}
module Annex.Notification (NotifyWitness, noNotification, notifyTransfer, notifyDrop) where
import Annex.Common
import Types.Transfer
+import Utility.Url
#ifdef WITH_DBUS_NOTIFICATIONS
import qualified Annex
import Types.DesktopNotify
@@ -25,29 +27,40 @@ data NotifyWitness = NotifyWitness
noNotification :: NotifyWitness
noNotification = NotifyWitness
+class Transferrable t where
+ descTransfrerrable :: t -> Maybe String
+
+instance Transferrable AssociatedFile where
+ descTransfrerrable (AssociatedFile af) = af
+
+instance Transferrable URLString where
+ descTransfrerrable = Just
+
{- Wrap around an action that performs a transfer, which may run multiple
- attempts. Displays notification when supported and when the user asked
- for it. -}
-notifyTransfer :: Direction -> AssociatedFile -> (NotifyWitness -> Annex Bool) -> Annex Bool
-notifyTransfer _ (AssociatedFile Nothing) a = a NotifyWitness
+notifyTransfer :: Transferrable t => Observable v => Direction -> t -> (NotifyWitness -> Annex v) -> Annex v
+notifyTransfer direction t a = case descTransfrerrable t of
+ Nothing -> a NotifyWitness
+ Just desc -> do
#ifdef WITH_DBUS_NOTIFICATIONS
-notifyTransfer direction (AssociatedFile (Just f)) a = do
- wanted <- Annex.getState Annex.desktopnotify
- if (notifyStart wanted || notifyFinish wanted)
- then do
- client <- liftIO DBus.Client.connectSession
- startnotification <- liftIO $ if notifyStart wanted
- then Just <$> Notify.notify client (startedTransferNote direction f)
- else pure Nothing
- ok <- a NotifyWitness
- when (notifyFinish wanted) $ liftIO $ void $ maybe
- (Notify.notify client $ finishedTransferNote ok direction f)
- (\n -> Notify.replace client n $ finishedTransferNote ok direction f)
- startnotification
- return ok
- else a NotifyWitness
+ wanted <- Annex.getState Annex.desktopnotify
+ if (notifyStart wanted || notifyFinish wanted)
+ then do
+ client <- liftIO DBus.Client.connectSession
+ startnotification <- liftIO $ if notifyStart wanted
+ then Just <$> Notify.notify client (startedTransferNote direction desc)
+ else pure Nothing
+ res <- a NotifyWitness
+ let ok = observeBool res
+ when (notifyFinish wanted) $ liftIO $ void $ maybe
+ (Notify.notify client $ finishedTransferNote ok direction desc)
+ (\n -> Notify.replace client n $ finishedTransferNote ok direction desc)
+ startnotification
+ return res
+ else a NotifyWitness
#else
-notifyTransfer _ (AssociatedFile (Just _)) a = a NotifyWitness
+ a NotifyWitness
#endif
notifyDrop :: AssociatedFile -> Bool -> Annex ()
@@ -63,13 +76,13 @@ notifyDrop (AssociatedFile (Just _)) _ = noop
#endif
#ifdef WITH_DBUS_NOTIFICATIONS
-startedTransferNote :: Direction -> FilePath -> Notify.Note
+startedTransferNote :: Direction -> String -> Notify.Note
startedTransferNote Upload = mkNote Notify.Transfer Notify.Low iconUpload
"Uploading"
startedTransferNote Download = mkNote Notify.Transfer Notify.Low iconDownload
"Downloading"
-finishedTransferNote :: Bool -> Direction -> FilePath -> Notify.Note
+finishedTransferNote :: Bool -> Direction -> String -> Notify.Note
finishedTransferNote False Upload = mkNote Notify.TransferError Notify.Normal iconFailure
"Failed to upload"
finishedTransferNote False Download = mkNote Notify.TransferError Notify.Normal iconFailure
@@ -79,7 +92,7 @@ finishedTransferNote True Upload = mkNote Notify.TransferComplete Notify.Low
finishedTransferNote True Download = mkNote Notify.TransferComplete Notify.Low iconSuccess
"Finished downloading"
-droppedNote :: Bool -> FilePath -> Notify.Note
+droppedNote :: Bool -> String -> Notify.Note
droppedNote False = mkNote Notify.TransferError Notify.Normal iconFailure
"Failed to drop"
droppedNote True = mkNote Notify.TransferComplete Notify.Low iconSuccess
diff --git a/Annex/Quvi.hs b/Annex/Quvi.hs
deleted file mode 100644
index efc63ca9f..000000000
--- a/Annex/Quvi.hs
+++ /dev/null
@@ -1,33 +0,0 @@
-{- quvi options for git-annex
- -
- - Copyright 2013 Joey Hess <id@joeyh.name>
- -
- - Licensed under the GNU GPL version 3 or higher.
- -}
-
-{-# LANGUAGE Rank2Types #-}
-
-module Annex.Quvi where
-
-import Annex.Common
-import qualified Annex
-import Utility.Quvi
-import Utility.Url
-
-withQuviOptions :: forall a. Query a -> [QuviParams] -> URLString -> Annex a
-withQuviOptions a ps url = do
- v <- quviVersion
- opts <- map Param . annexQuviOptions <$> Annex.getGitConfig
- liftIO $ a v (concatMap (\mkp -> mkp v) ps ++ opts) url
-
-quviSupported :: URLString -> Annex Bool
-quviSupported u = liftIO . flip supported u =<< quviVersion
-
-quviVersion :: Annex QuviVersion
-quviVersion = go =<< Annex.getState Annex.quviversion
- where
- go (Just v) = return v
- go Nothing = do
- v <- liftIO probeVersion
- Annex.changeState $ \s -> s { Annex.quviversion = Just v }
- return v
diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs
index 3fcf1a1b9..ccb5409a7 100644
--- a/Annex/Transfer.hs
+++ b/Annex/Transfer.hs
@@ -5,7 +5,7 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-{-# LANGUAGE CPP, FlexibleInstances, BangPatterns #-}
+{-# LANGUAGE CPP, BangPatterns #-}
module Annex.Transfer (
module X,
@@ -27,7 +27,6 @@ import Annex.Perms
import Utility.Metered
import Annex.LockPool
import Types.Key
-import Types.Remote (Verification(..))
import qualified Types.Remote as Remote
import Types.Concurrency
@@ -35,23 +34,6 @@ import Control.Concurrent
import qualified Data.Map.Strict as M
import Data.Ord
-class Observable a where
- observeBool :: a -> Bool
- observeFailure :: a
-
-instance Observable Bool where
- observeBool = id
- observeFailure = False
-
-instance Observable (Bool, Verification) where
- observeBool = fst
- observeFailure = (False, UnVerified)
-
-instance Observable (Either e Bool) where
- observeBool (Left _) = False
- observeBool (Right b) = b
- observeFailure = Right False
-
upload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
upload u key f d a _witness = guardHaveUUID u $
runTransfer (Transfer Upload u key) f d a
diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs
new file mode 100644
index 000000000..d3803075d
--- /dev/null
+++ b/Annex/YoutubeDl.hs
@@ -0,0 +1,142 @@
+{- youtube-dl integration for git-annex
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.YoutubeDl where
+
+import Annex.Common
+import qualified Annex
+import Annex.Content
+import Utility.Url (URLString)
+import Utility.DiskFree
+import Logs.Transfer
+
+-- Runs youtube-dl in a work directory, to download a single media file
+-- from the url. Reutrns the path to the media file in the work directory.
+--
+-- If youtube-dl fails without writing any files to the work directory,
+-- or is not installed, returns Right Nothing.
+--
+-- The work directory can contain files from a previous run of youtube-dl
+-- and it will resume. It should not contain any other files though,
+-- and youtube-dl needs to finish up with only one file in the directory
+-- so we know which one it downloaded.
+--
+-- (Note that we can't use --output to specifiy the file to download to,
+-- due to <https://github.com/rg3/youtube-dl/issues/14864>)
+youtubeDl :: URLString -> FilePath -> Annex (Either String (Maybe FilePath))
+youtubeDl url workdir = ifM (liftIO $ inPath "youtube-dl")
+ ( runcmd >>= \case
+ Right True -> do
+ fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir
+ case fs of
+ (f:[]) -> return (Right (Just f))
+ [] -> return nofiles
+ _ -> return (toomanyfiles fs)
+ Right False -> do
+ fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir
+ if null fs
+ then return (Right Nothing)
+ else return (Left "youtube-dl download is incomplete. Run the command again to resume.")
+ Left msg -> return (Left msg)
+ , return (Right Nothing)
+ )
+ where
+ nofiles = Left "youtube-dl did not put any media in its work directory, perhaps it's been configured to store files somewhere else?"
+ toomanyfiles fs = Left $ "youtube-dl downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs
+ runcmd = youtubeDlMaxSize workdir >>= \case
+ Left msg -> return (Left msg)
+ Right maxsize -> do
+ quiet <- commandProgressDisabled
+ opts <- youtubeDlOpts $ dlopts ++ maxsize ++
+ if quiet then [ Param "--quiet" ] else []
+ ok <- liftIO $ boolSystem' "youtube-dl" opts $
+ \p -> p { cwd = Just workdir }
+ return (Right ok)
+ dlopts =
+ [ Param url
+ -- To make youtube-dl only download one file when given a
+ -- page with a video and a playlist, download only the video.
+ , Param "--no-playlist"
+ -- And when given a page with only a playlist, download only
+ -- the first video on the playlist. (Assumes the video is
+ -- somewhat stable, but this is the only way to prevent
+ -- youtube-dl from downloading the whole playlist.)
+ , Param "--playlist-items", Param "0"
+ ]
+
+-- To honor annex.diskreserve, ask youtube-dl to not download too
+-- large a media file. Factors in other downloads that are in progress,
+-- and any files in the workdir that it may have partially downloaded
+-- before.
+youtubeDlMaxSize :: FilePath -> Annex (Either String [CommandParam])
+youtubeDlMaxSize workdir = ifM (Annex.getState Annex.force)
+ ( return $ Right []
+ , liftIO (getDiskFree workdir) >>= \case
+ Just have -> do
+ inprogress <- sizeOfDownloadsInProgress (const True)
+ partial <- liftIO $ sum
+ <$> (mapM getFileSize =<< dirContents workdir)
+ reserve <- annexDiskReserve <$> Annex.getGitConfig
+ let maxsize = have - reserve - inprogress + partial
+ if maxsize > 0
+ then return $ Right
+ [ Param "--max-filesize"
+ , Param (show maxsize)
+ ]
+ else return $ Left $
+ needMoreDiskSpace $
+ negate maxsize + 1024
+ Nothing -> return $ Right []
+ )
+
+-- Download a media file to a destination,
+youtubeDlTo :: Key -> URLString -> FilePath -> Annex Bool
+youtubeDlTo key url dest = do
+ res <- withTmpWorkDir key $ \workdir -> do
+ dl <- youtubeDl url workdir
+ case dl of
+ Right (Just mediafile) -> do
+ liftIO $ renameFile mediafile dest
+ return (Just True)
+ Right Nothing -> return (Just False)
+ Left msg -> do
+ warning msg
+ return Nothing
+ return (fromMaybe False res)
+
+youtubeDlSupported :: URLString -> Annex Bool
+youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
+
+-- Check if youtube-dl can find media in an url.
+youtubeDlCheck :: URLString -> Annex (Either String Bool)
+youtubeDlCheck url = catchMsgIO $ do
+ opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
+ liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
+
+-- Ask youtube-dl for the filename of media in an url.
+--
+-- (This is not always identical to the filename it uses when downloading.)
+youtubeDlFileName :: URLString -> Annex (Either String FilePath)
+youtubeDlFileName url = flip catchIO (pure . Left . show) $ do
+ -- Sometimes youtube-dl will fail with an ugly backtrace
+ -- (eg, http://bugs.debian.org/874321)
+ -- so catch stderr as well as stdout to avoid the user seeing it.
+ -- --no-warnings avoids warning messages that are output to stdout.
+ opts <- youtubeDlOpts
+ [ Param url
+ , Param "--get-filename"
+ , Param "--no-warnings"
+ ]
+ (output, ok) <- liftIO $ processTranscript "youtube-dl" (toCommand opts) Nothing
+ return $ case (ok, lines output) of
+ (True, (f:_)) | not (null f) -> Right f
+ _ -> Left "no media in url"
+
+youtubeDlOpts :: [CommandParam] -> Annex [CommandParam]
+youtubeDlOpts addopts = do
+ opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig
+ return (opts ++ addopts)
diff --git a/BuildInfo.hs b/BuildInfo.hs
index 47dacc78b..79253ee4f 100644
--- a/BuildInfo.hs
+++ b/BuildInfo.hs
@@ -81,7 +81,6 @@ buildFlags = filter (not . null)
-- Always enabled now, but users may be used to seeing these flags
-- listed.
, "Feeds"
- , "Quvi"
]
-- Not a complete list, let alone a listing transitive deps, but only
diff --git a/CHANGELOG b/CHANGELOG
index 5d40b0ce1..090a41797 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,17 @@
+git-annex (6.20171125) UNRELEASED; urgency=medium
+
+ * Use youtube-dl rather than quvi to download media from web pages,
+ since quvi is not being actively developed and youtube-dl supports
+ many more sites.
+ * addurl --relaxed got slower, since youtube-dl has to hit the network
+ to check for embedded media. If you relied on --relaxed not hitting the
+ network for speed reasons, using --relaxed --raw will get the old level
+ of speed, but can't be used for urls with embedded videos.
+ * importfeed now downloads things linked to by feeds, even when they are
+ not media files.
+
+ -- Joey Hess <id@joeyh.name> Tue, 28 Nov 2017 13:48:44 -0400
+
git-annex (6.20171124) unstable; urgency=medium
* Display progress meter when uploading a key without size information,
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index 866bfc463..0e937dc69 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2011-2014 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -21,6 +21,7 @@ import Annex.Content
import Annex.Ingest
import Annex.CheckIgnore
import Annex.UUID
+import Annex.YoutubeDl
import Logs.Web
import Types.KeySource
import Types.UrlContents
@@ -28,9 +29,8 @@ import Annex.FileMatcher
import Logs.Location
import Utility.Metered
import Utility.FileSystemEncoding
+import Utility.HtmlDetect
import qualified Annex.Transfer as Transfer
-import Annex.Quvi
-import qualified Utility.Quvi as Quvi
cmd :: Command
cmd = notBareRepo $ withGlobalOptions [jobsOption, jsonOption, jsonProgressOption] $
@@ -39,23 +39,23 @@ cmd = notBareRepo $ withGlobalOptions [jobsOption, jsonOption, jsonProgressOptio
data AddUrlOptions = AddUrlOptions
{ addUrls :: CmdParams
- , fileOption :: Maybe FilePath
, pathdepthOption :: Maybe Int
, prefixOption :: Maybe String
, suffixOption :: Maybe String
- , relaxedOption :: Bool
- , rawOption :: Bool
+ , downloadOptions :: DownloadOptions
, batchOption :: BatchMode
, batchFilesOption :: Bool
}
+data DownloadOptions = DownloadOptions
+ { relaxedOption :: Bool
+ , rawOption :: Bool
+ , fileOption :: Maybe FilePath
+ }
+
optParser :: CmdParamsDesc -> Parser AddUrlOptions
optParser desc = AddUrlOptions
<$> cmdParams desc
- <*> optional (strOption
- ( long "file" <> metavar paramFile
- <> help "specify what file the url is added to"
- ))
<*> optional (option auto
( long "pathdepth" <> metavar paramNumber
<> help "number of url path components to use in filename"
@@ -68,25 +68,29 @@ optParser desc = AddUrlOptions
( long "suffix" <> metavar paramValue
<> help "add a suffix to the filename"
))
- <*> parseRelaxedOption
- <*> parseRawOption
+ <*> parseDownloadOptions True
<*> parseBatchOption
<*> switch
( long "with-files"
<> help "parse batch mode lines of the form \"$url $file\""
)
-parseRelaxedOption :: Parser Bool
-parseRelaxedOption = switch
- ( long "relaxed"
- <> help "skip size check"
- )
-
-parseRawOption :: Parser Bool
-parseRawOption = switch
- ( long "raw"
- <> help "disable special handling for torrents, quvi, etc"
- )
+parseDownloadOptions :: Bool -> Parser DownloadOptions
+parseDownloadOptions withfileoption = DownloadOptions
+ <$> switch
+ ( long "relaxed"
+ <> help "skip size check"
+ )
+ <*> switch
+ ( long "raw"
+ <> help "disable special handling for torrents, youtube-dl, etc"
+ )
+ <*> if withfileoption
+ then optional (strOption
+ ( long "file" <> metavar paramFile
+ <> help "specify what file the url is added to"
+ ))
+ else pure Nothing
seek :: AddUrlOptions -> CommandSeek
seek o = allowConcurrentOutput $ do
@@ -97,7 +101,7 @@ seek o = allowConcurrentOutput $ do
where
go (o', u) = do
r <- Remote.claimingUrl u
- if Remote.uuid r == webUUID || rawOption o'
+ if Remote.uuid r == webUUID || rawOption (downloadOptions o')
then void $ commandAction $ startWeb o' u
else checkUrl r o' u
@@ -107,13 +111,13 @@ parseBatchInput o s
let (u, f) = separate (== ' ') s
in if null u || null f
then Left ("parsed empty url or filename in input: " ++ s)
- else Right (o { fileOption = Just f }, u)
+ else Right (o { downloadOptions = (downloadOptions o) { fileOption = Just f } }, u)
| otherwise = Right (o, s)
checkUrl :: Remote -> AddUrlOptions -> URLString -> Annex ()
checkUrl r o u = do
pathmax <- liftIO $ fileNameLengthLimit "."
- let deffile = fromMaybe (urlString2file u (pathdepthOption o) pathmax) (fileOption o)
+ let deffile = fromMaybe (urlString2file u (pathdepthOption o) pathmax) (fileOption (downloadOptions o))
go deffile =<< maybe
(error $ "unable to checkUrl of " ++ Remote.name r)
(tryNonAsync . flip id u)
@@ -121,50 +125,50 @@ checkUrl r o u = do
where
go _ (Left e) = void $ commandAction $ do
- showStart "addurl" u
+ showStart' "addurl" (Just u)
warning (show e)
next $ next $ return False
go deffile (Right (UrlContents sz mf)) = do
- let f = adjustFile o (fromMaybe (maybe deffile fromSafeFilePath mf) (fileOption o))
- void $ commandAction $
- startRemote r (relaxedOption o) f u sz
+ let f = adjustFile o (fromMaybe (maybe deffile fromSafeFilePath mf) (fileOption (downloadOptions o)))
+ void $ commandAction $ startRemote r o f u sz
go deffile (Right (UrlMulti l))
- | isNothing (fileOption o) =
+ | isNothing (fileOption (downloadOptions o)) =
forM_ l $ \(u', sz, f) -> do
let f' = adjustFile o (deffile </> fromSafeFilePath f)
void $ commandAction $
- startRemote r (relaxedOption o) f' u' sz
+ startRemote r o f' u' sz
| otherwise = giveup $ unwords
[ "That url contains multiple files according to the"
, Remote.name r
, " remote; cannot add it to a single file."
]
-startRemote :: Remote -> Bool -> FilePath -> URLString -> Maybe Integer -> CommandStart
-startRemote r relaxed file uri sz = do
+startRemote :: Remote -> AddUrlOptions -> FilePath -> URLString -> Maybe Integer -> CommandStart
+startRemote r o file uri sz = do
pathmax <- liftIO $ fileNameLengthLimit "."
let file' = joinPath $ map (truncateFilePath pathmax) $ splitDirectories file
- showStart "addurl" file'
+ showStart' "addurl" (Just uri)
showNote $ "from " ++ Remote.name r
- next $ performRemote r relaxed uri file' sz
+ showDestinationFile file'
+ next $ performRemote r o uri file' sz
-performRemote :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> CommandPerform
-performRemote r relaxed uri file sz = ifAnnexed file adduri geturi
+performRemote :: Remote -> AddUrlOptions -> URLString -> FilePath -> Maybe Integer -> CommandPerform
+performRemote r o uri file sz = ifAnnexed file adduri geturi
where
loguri = setDownloader uri OtherDownloader
- adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize
+ adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize
checkexistssize key = return $ case sz of
- Nothing -> (True, True)
- Just n -> (True, n == fromMaybe n (keySize key))
- geturi = next $ isJust <$> downloadRemoteFile r relaxed uri file sz
+ Nothing -> (True, True, uri)
+ Just n -> (True, n == fromMaybe n (keySize key), uri)
+ geturi = next $ isJust <$> downloadRemoteFile r (downloadOptions o) uri file sz
-downloadRemoteFile :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key)
-downloadRemoteFile r relaxed uri file sz = checkCanAdd file $ do
+downloadRemoteFile :: Remote -> DownloadOptions -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key)
+downloadRemoteFile r o uri file sz = checkCanAdd file $ do
let urlkey = Backend.URL.fromUrl uri sz
liftIO $ createDirectoryIfMissing True (parentDir file)
- ifM (Annex.getState Annex.fast <||> pure relaxed)
+ ifM (Annex.getState Annex.fast <||> pure (relaxedOption o))
( do
- cleanup (Remote.uuid r) loguri file urlkey Nothing
+ addWorkTree (Remote.uuid r) loguri file urlkey Nothing
return (Just urlkey)
, do
-- Set temporary url for the urlkey
@@ -181,24 +185,18 @@ downloadRemoteFile r relaxed uri file sz = checkCanAdd file $ do
where
loguri = setDownloader uri OtherDownloader
-startWeb :: AddUrlOptions -> String -> CommandStart
-startWeb o s = go $ fromMaybe bad $ parseURI urlstring
+startWeb :: AddUrlOptions -> URLString -> CommandStart
+startWeb o urlstring = go $ fromMaybe bad $ parseURI urlstring
where
- (urlstring, downloader) = getDownloader s
bad = fromMaybe (giveup $ "bad url " ++ urlstring) $
Url.parseURIRelaxed $ urlstring
- go url = case downloader of
- QuviDownloader -> usequvi
- _ -> ifM (quviSupported urlstring)
- ( usequvi
- , regulardownload url
- )
- regulardownload url = do
+ go url = do
+ showStart' "addurl" (Just urlstring)
pathmax <- liftIO $ fileNameLengthLimit "."
- urlinfo <- if relaxedOption o
+ urlinfo <- if relaxedOption (downloadOptions o)
then pure Url.assumeUrlExists
else Url.withUrlOptions (Url.getUrlInfo urlstring)
- file <- adjustFile o <$> case fileOption o of
+ file <- adjustFile o <$> case fileOption (downloadOptions o) of
Just f -> pure f
Nothing -> case Url.urlSuggestedFile urlinfo of
Nothing -> pure $ url2file url (pathdepthOption o) pathmax
@@ -209,79 +207,31 @@ startWeb o s = go $ fromMaybe bad $ parseURI urlstring
( pure $ url2file url (pathdepthOption o) pathmax
, pure f
)
- showStart "addurl" file
- next $ performWeb (relaxedOption o) urlstring file urlinfo
- badquvi = giveup $ "quvi does not know how to download url " ++ urlstring
- usequvi = do
- page <- fromMaybe badquvi
- <$> withQuviOptions Quvi.forceQuery [Quvi.quiet, Quvi.httponly] urlstring
- let link = fromMaybe badquvi $ headMaybe $ Quvi.pageLinks page
- pathmax <- liftIO $ fileNameLengthLimit "."
- let file = adjustFile o $ flip fromMaybe (fileOption o) $
- truncateFilePath pathmax $ sanitizeFilePath $
- Quvi.pageTitle page ++ "." ++ fromMaybe "m" (Quvi.linkSuffix link)
- showStart "addurl" file
- next $ performQuvi (relaxedOption o) urlstring (Quvi.linkUrl link) file
-
-performWeb :: Bool -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform
-performWeb relaxed url file urlinfo = ifAnnexed file addurl geturl
- where
- geturl = next $ isJust <$> addUrlFile relaxed url urlinfo file
- addurl = addUrlChecked relaxed url webUUID $ \k -> return $
- (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k)
+ next $ performWeb o urlstring file urlinfo
-performQuvi :: Bool -> URLString -> URLString -> FilePath -> CommandPerform
-performQuvi relaxed pageurl videourl file = ifAnnexed file addurl geturl
- where
- quviurl = setDownloader pageurl QuviDownloader
- addurl key = next $ do
- cleanup webUUID quviurl file key Nothing
- return True
- geturl = next $ isJust <$> addUrlFileQuvi relaxed quviurl videourl file
-
-addUrlFileQuvi :: Bool -> URLString -> URLString -> FilePath -> Annex (Maybe Key)
-addUrlFileQuvi relaxed quviurl videourl file = checkCanAdd file $ do
- let key = Backend.URL.fromUrl quviurl Nothing
- ifM (pure relaxed <||> Annex.getState Annex.fast)
- ( do
- cleanup webUUID quviurl file key Nothing
- return (Just key)
- , do
- {- Get the size, and use that to check
- - disk space. However, the size info is not
- - retained, because the size of a video stream
- - might change and we want to be able to download
- - it later. -}
- urlinfo <- Url.withUrlOptions (Url.getUrlInfo videourl)
- let sizedkey = addSizeUrlKey urlinfo key
- checkDiskSpaceToGet sizedkey Nothing $ do
- tmp <- fromRepo $ gitAnnexTmpObjectLocation key
- showOutput
- ok <- Transfer.notifyTransfer Transfer.Download afile $
- Transfer.download webUUID key afile Transfer.forwardRetry $ \p -> do
- liftIO $ createDirectoryIfMissing True (parentDir tmp)
- downloadUrl key p [videourl] tmp
- if ok
- then do
- cleanup webUUID quviurl file key (Just tmp)
- return (Just key)
- else return Nothing
- )
+performWeb :: AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform
+performWeb o url file urlinfo = ifAnnexed file addurl geturl
where
- afile = AssociatedFile (Just file)
+ geturl = next $ isJust <$> addUrlFile (downloadOptions o) url urlinfo file
+ addurl = addUrlChecked o url file webUUID $ \k ->
+ ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
+ ( return (True, True, setDownloader url YoutubeDownloader)
+ , return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url)
+ )
-addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool)) -> Key -> CommandPerform
-addUrlChecked relaxed url u checkexistssize key
- | relaxed = do
- setUrlPresent u key url
- next $ return True
- | otherwise = ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key))
- ( next $ return True -- nothing to do
+{- Check that the url exists, and has the same size as the key,
+ - and add it as an url to the key. -}
+addUrlChecked :: AddUrlOptions -> URLString -> FilePath -> UUID -> (Key -> Annex (Bool, Bool, URLString)) -> Key -> CommandPerform
+addUrlChecked o url file u checkexistssize key =
+ ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key))
+ ( do
+ showDestinationFile file
+ next $ return True
, do
- (exists, samesize) <- checkexistssize key
- if exists && samesize
+ (exists, samesize, url') <- checkexistssize key
+ if exists && (samesize || relaxedOption (downloadOptions o))
then do
- setUrlPresent u key url
+ setUrlPresent u key url'
next $ return True
else do
warning $ "while adding a new url to an already annexed file, " ++ if exists
@@ -290,64 +240,122 @@ addUrlChecked relaxed url u checkexistssize key
stop
)
-addUrlFile :: Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
-addUrlFile relaxed url urlinfo file = checkCanAdd file $ do
- liftIO $ createDirectoryIfMissing True (parentDir file)
- ifM (Annex.getState Annex.fast <||> pure relaxed)
- ( nodownload url urlinfo file
- , downloadWeb url urlinfo file
+{- Downloads an url (except in fast or relaxed mode) and adds it to the
+ - repository, normally at the specified FilePath.
+ - But, if youtube-dl supports the url, it will be written to a
+ - different file, based on the title of the media. Unless the user
+ - specified fileOption, which then forces using the FilePath.
+ -}
+addUrlFile :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
+addUrlFile o url urlinfo file =
+ ifM (Annex.getState Annex.fast <||> pure (relaxedOption o))
+ ( nodownloadWeb o url urlinfo file
+ , downloadWeb o url urlinfo file
)
-downloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
-downloadWeb url urlinfo file = do
- let dummykey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
- let downloader f p = do
+downloadWeb :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
+downloadWeb o url urlinfo file =
+ go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file))
+ where
+ urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
+ downloader f p = do
showOutput
- downloadUrl dummykey p [url] f
- showAction $ "downloading " ++ url ++ " "
- downloadWith downloader dummykey webUUID url file
+ downloadUrl urlkey p [url] f
+ go Nothing = return Nothing
+ -- If we downloaded a html file, try to use youtube-dl to
+ -- extract embedded media.
+ go (Just tmp) = ifM (pure (not (rawOption o)) <&&> liftIO (isHtml <$> readFile tmp))
+ ( tryyoutubedl tmp
+ , normalfinish tmp
+ )
+ normalfinish tmp = checkCanAdd file $ do
+ showDestinationFile file
+ liftIO $ createDirectoryIfMissing True (parentDir file)
+ finishDownloadWith tmp webUUID url file
+ tryyoutubedl tmp = withTmpWorkDir mediakey $ \workdir ->
+ Transfer.notifyTransfer Transfer.Download url $
+ Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do
+ dl <- youtubeDl url workdir
+ case dl of
+ Right (Just mediafile) -> do
+ pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
+ let dest = if isJust (fileOption o)
+ then file
+ else takeFileName mediafile
+ checkCanAdd dest $ do
+ showDestinationFile dest
+ addWorkTree webUUID mediaurl dest mediakey (Just mediafile)
+ return $ Just mediakey
+ Right Nothing -> normalfinish tmp
+ Left msg -> do
+ warning msg
+ return Nothing
+ where
+ mediaurl = setDownloader url YoutubeDownloader
+ mediakey = Backend.URL.fromUrl mediaurl Nothing
+
+showDestinationFile :: FilePath -> Annex ()
+showDestinationFile file = do
+ showNote ("to " ++ file)
+ maybeShowJSON $ JSONChunk [("file", file)]
{- The Key should be a dummy key, based on the URL, which is used
- for this download, before we can examine the file and find its real key.
- For resuming downloads to work, the dummy key for a given url should be
- - stable. -}
+ - stable. For disk space checking to work, the dummy key should have
+ - the size of the url already set.
+ -
+ - Downloads the url, sets up the worktree file, and returns the
+ - real key.
+ -}
downloadWith :: (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> FilePath -> Annex (Maybe Key)
downloadWith downloader dummykey u url file =
- checkDiskSpaceToGet dummykey Nothing $ do
- tmp <- fromRepo $ gitAnnexTmpObjectLocation dummykey
- ifM (runtransfer tmp)
- ( do
- backend <- chooseBackend file
- let source = KeySource
- { keyFilename = file
- , contentLocation = tmp
- , inodeCache = Nothing
- }
- k <- genKey source backend
- case k of
- Nothing -> return Nothing
- Just (key, _) -> do
- cleanup u url file key (Just tmp)
- return (Just key)
- , return Nothing
- )
+ go =<< downloadWith' downloader dummykey u url afile
where
- runtransfer tmp = Transfer.notifyTransfer Transfer.Download afile $
- Transfer.download u dummykey afile Transfer.forwardRetry $ \p -> do
- liftIO $ createDirectoryIfMissing True (parentDir tmp)
- downloader tmp p
afile = AssociatedFile (Just file)
+ go Nothing = return Nothing
+ go (Just tmp) = finishDownloadWith tmp u url file
+
+{- Like downloadWith, but leaves the dummy key content in
+ - the returned location. -}
+downloadWith' :: (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> AssociatedFile -> Annex (Maybe FilePath)
+downloadWith' downloader dummykey u url afile =
+ checkDiskSpaceToGet dummykey Nothing $ do
+ tmp <- fromRepo $ gitAnnexTmpObjectLocation dummykey
+ ok <- Transfer.notifyTransfer Transfer.Download url $
+ Transfer.download u dummykey afile Transfer.forwardRetry $ \p -> do
+ liftIO $ createDirectoryIfMissing True (parentDir tmp)
+ downloader tmp p
+ if ok
+ then return (Just tmp)
+ else return Nothing
+
+finishDownloadWith :: FilePath -> UUID -> URLString -> FilePath -> Annex (Maybe Key)
+finishDownloadWith tmp u url file = do
+ backend <- chooseBackend file
+ let source = KeySource
+ { keyFilename = file
+ , contentLocation = tmp
+ , inodeCache = Nothing
+ }
+ k <- genKey source backend
+ case k of
+ Nothing -> return Nothing
+ Just (key, _) -> do
+ addWorkTree u url file key (Just tmp)
+ return (Just key)
{- Adds the url size to the Key. -}
addSizeUrlKey :: Url.UrlInfo -> Key -> Key
addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo }
-cleanup :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
-cleanup u url file key mtmp = case mtmp of
+{- Adds worktree file to the repository. -}
+addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
+addWorkTree u url file key mtmp = case mtmp of
Nothing -> go
Just tmp -> do
-- Move to final location for large file check.
- liftIO $ renameFile tmp file
+ pruneTmpWorkDirBefore tmp (\_ -> liftIO $ renameFile tmp file)
largematcher <- largeFilesMatcher
large <- checkFileMatcher largematcher file
if large
@@ -366,18 +374,36 @@ cleanup u url file key mtmp = case mtmp of
( do
when (isJust mtmp) $
logStatus key InfoPresent
- , liftIO $ maybe noop nukeFile mtmp
+ , maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . nukeFile)) mtmp
)
-nodownload :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
-nodownload url urlinfo file
- | Url.urlExists urlinfo = do
- let key = Backend.URL.fromUrl url (Url.urlSize urlinfo)
- cleanup webUUID url file key Nothing
- return (Just key)
+nodownloadWeb :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
+nodownloadWeb o url urlinfo file
+ | Url.urlExists urlinfo = if rawOption o
+ then nomedia
+ else either (const nomedia) usemedia
+ =<< youtubeDlFileName url
| otherwise = do
warning $ "unable to access url: " ++ url
return Nothing
+ where
+ nomedia = do
+ let key = Backend.URL.fromUrl url (Url.urlSize urlinfo)
+ nodownloadWeb' url key file
+ usemedia mediafile = do
+ let dest = if isJust (fileOption o)
+ then file
+ else takeFileName mediafile
+ let mediaurl = setDownloader url YoutubeDownloader
+ let mediakey = Backend.URL.fromUrl mediaurl Nothing
+ nodownloadWeb' mediaurl mediakey dest
+
+nodownloadWeb' :: URLString -> Key -> FilePath -> Annex (Maybe Key)
+nodownloadWeb' url key file = checkCanAdd file $ do
+ showDestinationFile file
+ liftIO $ createDirectoryIfMissing True (parentDir file)
+ addWorkTree webUUID url file key Nothing
+ return (Just key)
url2file :: URI -> Maybe Int -> Int -> FilePath
url2file url pathdepth pathmax = case pathdepth of
@@ -411,7 +437,7 @@ adjustFile o = addprefix . addsuffix
checkCanAdd :: FilePath -> Annex (Maybe a) -> Annex (Maybe a)
checkCanAdd file a = ifM (isJust <$> (liftIO $ catchMaybeIO $ getSymbolicLinkStatus file))
( do
- warning $ file ++ " already exists and is not annexed; not overwriting"
+ warning $ file ++ " already exists; not overwriting"
return Nothing
, ifM ((not <$> Annex.getState Annex.force) <&&> checkIgnored file)
( do
diff --git a/Command/Adjust.hs b/Command/Adjust.hs
index 204fd057a..0fef3f936 100644
--- a/Command/Adjust.hs
+++ b/Command/Adjust.hs
@@ -38,5 +38,5 @@ seek = commandAction . start
start :: Adjustment -> CommandStart
start adj = do
checkVersionSupported
- showStart "adjust" ""
+ showStart' "adjust" Nothing
next $ next $ enterAdjustedBranch adj
diff --git a/Command/Config.hs b/Command/Config.hs
index 5da196044..47415999d 100644
--- a/Command/Config.hs
+++ b/Command/Config.hs
@@ -50,14 +50,14 @@ optParser _ = setconfig <|> getconfig <|> unsetconfig
seek :: Action -> CommandSeek
seek (SetConfig name val) = commandAction $ do
allowMessages
- showStart name val
+ showStart' name (Just val)
next $ next $ do
setGlobalConfig name val
setConfig (ConfigKey name) val
return True
seek (UnsetConfig name) = commandAction $ do
allowMessages
- showStart name "unset"
+ showStart' name (Just "unset")
next $ next $ do
unsetGlobalConfig name
unsetConfig (ConfigKey name)
diff --git a/Command/Dead.hs b/Command/Dead.hs
index 44cf7b7f6..385dd6fad 100644
--- a/Command/Dead.hs
+++ b/Command/Dead.hs
@@ -33,7 +33,7 @@ seek (DeadKeys ks) = seekActions $ pure $ map startKey ks
startKey :: Key -> CommandStart
startKey key = do
- showStart "dead" (key2file key)
+ showStart' "dead" (Just $ key2file key)
ls <- keyLocations key
case ls of
[] -> next $ performKey key
diff --git a/Command/Describe.hs b/Command/Describe.hs
index dc7a5d8f9..4b86a7b58 100644
--- a/Command/Describe.hs
+++ b/Command/Describe.hs
@@ -22,7 +22,7 @@ seek = withWords start
start :: [String] -> CommandStart
start (name:description) = do
- showStart "describe" name
+ showStart' "describe" (Just name)
u <- Remote.nameToUUID name
next $ perform u $ unwords description
start _ = giveup "Specify a repository and a description."
diff --git a/Command/Direct.hs b/Command/Direct.hs
index 817cedd53..20eeef726 100644
--- a/Command/Direct.hs
+++ b/Command/Direct.hs
@@ -31,7 +31,7 @@ start = ifM versionSupportsDirectMode
perform :: CommandPerform
perform = do
- showStart "commit" ""
+ showStart' "commit" Nothing
showOutput
_ <- inRepo $ Git.Branch.commitCommand Git.Branch.ManualCommit
[ Param "-a"
@@ -65,6 +65,6 @@ perform = do
cleanup :: CommandCleanup
cleanup = do
- showStart "direct" ""
+ showStart' "direct" Nothing
setDirect True
return True
diff --git a/Command/Drop.hs b/Command/Drop.hs
index b03e3e080..275714a65 100644
--- a/Command/Drop.hs
+++ b/Command/Drop.hs
@@ -89,12 +89,12 @@ startKeys o key = start' o key (AssociatedFile Nothing)
startLocal :: AssociatedFile -> ActionItem -> NumCopies -> Key -> [VerifiedCopy] -> CommandStart
startLocal afile ai numcopies key preverified = stopUnless (inAnnex key) $ do
- showStart' "drop" key ai
+ showStartKey "drop" key ai
next $ performLocal key afile numcopies preverified
startRemote :: AssociatedFile -> ActionItem -> NumCopies -> Key -> Remote -> CommandStart
startRemote afile ai numcopies key remote = do
- showStart' ("drop " ++ Remote.name remote) key ai
+ showStartKey ("drop " ++ Remote.name remote) key ai
next $ performRemote key afile numcopies remote
performLocal :: Key -> AssociatedFile -> NumCopies -> [VerifiedCopy] -> CommandPerform
diff --git a/Command/DropKey.hs b/Command/DropKey.hs
index 65446ba06..f3f2333dd 100644
--- a/Command/DropKey.hs
+++ b/Command/DropKey.hs
@@ -42,7 +42,7 @@ seek o = do
start :: Key -> CommandStart
start key = do
- showStart' "dropkey" key (mkActionItem key)
+ showStartKey "dropkey" key (mkActionItem key)
next $ perform key
perform :: Key -> CommandPerform
diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs
index 840a8a472..c5a61d739 100644
--- a/Command/DropUnused.hs
+++ b/Command/DropUnused.hs
@@ -55,5 +55,5 @@ perform from numcopies key = case from of
performOther :: (Key -> Git.Repo -> FilePath) -> Key -> CommandPerform
performOther filespec key = do
f <- fromRepo $ filespec key
- liftIO $ nukeFile f
+ pruneTmpWorkDirBefore f (liftIO . nukeFile)
next $ return True
diff --git a/Command/EnableRemote.hs b/Command/EnableRemote.hs
index fd830375a..d9993ebc9 100644
--- a/Command/EnableRemote.hs
+++ b/Command/EnableRemote.hs
@@ -55,7 +55,7 @@ start (name:rest) = go =<< filter matchingname <$> Annex.fromRepo Git.remotes
startNormalRemote :: Git.RemoteName -> [String] -> Git.Repo -> CommandStart
startNormalRemote name restparams r
| null restparams = do
- showStart "enableremote" name
+ showStart' "enableremote" (Just name)
next $ next $ do
setRemoteIgnore r False
r' <- Remote.Git.configRead False r
@@ -77,7 +77,7 @@ startSpecialRemote name config Nothing = do
startSpecialRemote name config (Just (u, c)) = do
let fullconfig = config `M.union` c
t <- either giveup return (Annex.SpecialRemote.findType fullconfig)
- showStart "enableremote" name
+ showStart' "enableremote" (Just name)
gc <- maybe (liftIO dummyRemoteGitConfig)
(return . Remote.gitconfig)
=<< Remote.byUUID u
diff --git a/Command/EnableTor.hs b/Command/EnableTor.hs
index 7076a9a73..72fa50448 100644
--- a/Command/EnableTor.hs
+++ b/Command/EnableTor.hs
@@ -51,7 +51,7 @@ start os = do
Nothing -> giveup "Need user-id parameter."
Just userid -> go uuid userid
else do
- showStart "enable-tor" ""
+ showStart' "enable-tor" Nothing
gitannex <- liftIO readProgramFile
let ps = [Param (cmdname cmd), Param (show curruserid)]
sucommand <- liftIO $ mkSuCommand gitannex ps
diff --git a/Command/Expire.hs b/Command/Expire.hs
index 551742304..28f90dfb5 100644
--- a/Command/Expire.hs
+++ b/Command/Expire.hs
@@ -59,12 +59,12 @@ start :: Expire -> Bool -> Log Activity -> M.Map UUID String -> UUID -> CommandS
start (Expire expire) noact actlog descs u =
case lastact of
Just ent | notexpired ent -> checktrust (== DeadTrusted) $ do
- showStart "unexpire" desc
+ showStart' "unexpire" (Just desc)
showNote =<< whenactive
unless noact $
trustSet u SemiTrusted
_ -> checktrust (/= DeadTrusted) $ do
- showStart "expire" desc
+ showStart' "expire" (Just desc)
showNote =<< whenactive
unless noact $
trustSet u DeadTrusted
diff --git a/Command/Forget.hs b/Command/Forget.hs
index d172cc693..40ac98d4b 100644
--- a/Command/Forget.hs
+++ b/Command/Forget.hs
@@ -34,7 +34,7 @@ seek = commandAction . start
start :: ForgetOptions -> CommandStart
start o = do
- showStart "forget" "git-annex"
+ showStart' "forget" (Just "git-annex")
c <- liftIO currentVectorClock
let basets = addTransition c ForgetGitHistory noTransitions
let ts = if dropDead o
diff --git a/Command/FromKey.hs b/Command/FromKey.hs
index c1e3a7965..1b276db99 100644
--- a/Command/FromKey.hs
+++ b/Command/FromKey.hs
@@ -41,7 +41,7 @@ start force (keyname, file) = do
startMass :: CommandStart
startMass = do
- showStart "fromkey" "stdin"
+ showStart' "fromkey" (Just "stdin")
next massAdd
massAdd :: CommandPerform
diff --git a/Command/Fsck.hs b/Command/Fsck.hs
index 514421e93..bc7a29f15 100644
--- a/Command/Fsck.hs
+++ b/Command/Fsck.hs
@@ -536,7 +536,7 @@ badContentRemote remote localcopy key = do
runFsck :: Incremental -> ActionItem -> Key -> Annex Bool -> CommandStart
runFsck inc ai key a = ifM (needFsck inc key)
( do
- showStart' "fsck" key ai
+ showStartKey "fsck" key ai
next $ do
ok <- a
when ok $
diff --git a/Command/Get.hs b/Command/Get.hs
index a412b2cb3..a74ca253f 100644
--- a/Command/Get.hs
+++ b/Command/Get.hs
@@ -71,7 +71,7 @@ start' expensivecheck from key afile ai = onlyActionOn key $
go $ Command.Move.fromPerform src False key afile
where
go a = do
- showStart' "get" key ai
+ showStartKey "get" key ai
next a
perform :: Key -> AssociatedFile -> CommandPerform
diff --git a/Command/Group.hs b/Command/Group.hs
index 65e062589..52b7e688c 100644
--- a/Command/Group.hs
+++ b/Command/Group.hs
@@ -24,7 +24,7 @@ seek = withWords start
start :: [String] -> CommandStart
start (name:g:[]) = do
allowMessages
- showStart "group" name
+ showStart' "group" (Just name)
u <- Remote.nameToUUID name
next $ setGroup u g
start (name:[]) = do
diff --git a/Command/GroupWanted.hs b/Command/GroupWanted.hs
index 939b3030a..8dab02933 100644
--- a/Command/GroupWanted.hs
+++ b/Command/GroupWanted.hs
@@ -24,6 +24,6 @@ start :: [String] -> CommandStart
start (g:[]) = next $ performGet groupPreferredContentMapRaw g
start (g:expr:[]) = do
allowMessages
- showStart "groupwanted" g
+ showStart' "groupwanted" (Just g)
next $ performSet groupPreferredContentSet expr g
start _ = giveup "Specify a group."
diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs
index d2989f05b..a02d11824 100644
--- a/Command/ImportFeed.hs
+++ b/Command/ImportFeed.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2013 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -32,16 +32,16 @@ import Types.UrlContents
import Logs.Web
import qualified Utility.Format
import Utility.Tmp
-import Command.AddUrl (addUrlFile, downloadRemoteFile, parseRelaxedOption, parseRawOption)
+import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, DownloadOptions(..))
import Annex.Perms
import Annex.UUID
import Backend.URL (fromUrl)
-import Annex.Quvi
-import qualified Utility.Quvi as Quvi
-import Command.AddUrl (addUrlFileQuvi)
+import Annex.Content
+import Annex.YoutubeDl
import Types.MetaData
import Logs.MetaData
import Annex.MetaData
+import Command.AddUrl (addWorkTree)
cmd :: Command
cmd = notBareRepo $
@@ -51,8 +51,7 @@ cmd = notBareRepo $
data ImportFeedOptions = ImportFeedOptions
{ feedUrls :: CmdParams
, templateOption :: Maybe String
- , relaxedOption :: Bool
- , rawOption :: Bool
+ , downloadOptions :: DownloadOptions
}
optParser :: CmdParamsDesc -> Parser ImportFeedOptions
@@ -62,8 +61,7 @@ optParser desc = ImportFeedOptions
( long "template" <> metavar paramFormat
<> help "template for filenames"
))
- <*> parseRelaxedOption
- <*> parseRawOption
+ <*> parseDownloadOptions False
seek :: ImportFeedOptions -> CommandSeek
seek o = do
@@ -72,7 +70,7 @@ seek o = do
start :: ImportFeedOptions -> Cache -> URLString -> CommandStart
start opts cache url = do
- showStart "importfeed" url
+ showStart' "importfeed" (Just url)
next $ perform opts cache url
perform :: ImportFeedOptions -> Cache -> URLString -> CommandPerform
@@ -101,7 +99,7 @@ data ToDownload = ToDownload
, location :: DownloadLocation
}
-data DownloadLocation = Enclosure URLString | QuviLink URLString
+data DownloadLocation = Enclosure URLString | MediaLink URLString
type ItemId = String
@@ -141,14 +139,10 @@ findDownloads u = go =<< downloadFeed u
Just (enclosureurl, _, _) -> return $
Just $ ToDownload f u i $ Enclosure $
fromFeed enclosureurl
- Nothing -> mkquvi f i
- mkquvi f i = case getItemLink i of
- Just link -> ifM (quviSupported $ fromFeed link)
- ( return $ Just $ ToDownload f u i $ QuviLink $
- fromFeed link
- , return Nothing
- )
- Nothing -> return Nothing
+ Nothing -> case getItemLink i of
+ Just link -> return $ Just $ ToDownload f u i $
+ MediaLink $ fromFeed link
+ Nothing -> return Nothing
{- Feeds change, so a feed download cannot be resumed. -}
downloadFeed :: URLString -> Annex (Maybe Feed)
@@ -169,12 +163,19 @@ performDownload opts cache todownload = case location todownload of
Enclosure url -> checkknown url $
rundownload url (takeWhile (/= '?') $ takeExtension url) $ \f -> do
r <- Remote.claimingUrl url
- if Remote.uuid r == webUUID || rawOption opts
+ if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
then do
- urlinfo <- if relaxedOption opts
+ urlinfo <- if relaxedOption (downloadOptions opts)
then pure Url.assumeUrlExists
else Url.withUrlOptions (Url.getUrlInfo url)
- maybeToList <$> addUrlFile (relaxedOption opts) url urlinfo f
+ let dlopts = (downloadOptions opts)
+ -- force using the filename
+ -- chosen here
+ { fileOption = Just f
+ -- don't use youtube-dl
+ , rawOption = True
+ }
+ maybeToList <$> addUrlFile dlopts url urlinfo f
else do
res <- tryNonAsync $ maybe
(error $ "unable to checkUrl of " ++ Remote.name r)
@@ -184,27 +185,26 @@ performDownload opts cache todownload = case location todownload of
Left _ -> return []
Right (UrlContents sz _) ->
maybeToList <$>
- downloadRemoteFile r (relaxedOption opts) url f sz
+ downloadRemoteFile r (downloadOptions opts) url f sz
Right (UrlMulti l) -> do
kl <- forM l $ \(url', sz, subf) ->
- downloadRemoteFile r (relaxedOption opts) url' (f </> fromSafeFilePath subf) sz
+ downloadRemoteFile r (downloadOptions opts) url' (f </> fromSafeFilePath subf) sz
return $ if all isJust kl
then catMaybes kl
else []
- QuviLink pageurl -> do
- let quviurl = setDownloader pageurl QuviDownloader
- checkknown quviurl $ do
- mp <- withQuviOptions Quvi.query [Quvi.quiet, Quvi.httponly] pageurl
- case mp of
- Nothing -> return False
- Just page -> case headMaybe $ Quvi.pageLinks page of
- Nothing -> return False
- Just link -> do
- let videourl = Quvi.linkUrl link
- checkknown videourl $
- rundownload videourl ("." ++ fromMaybe "m" (Quvi.linkSuffix link)) $ \f ->
- maybeToList <$> addUrlFileQuvi (relaxedOption opts) quviurl videourl f
+ MediaLink linkurl -> do
+ let mediaurl = setDownloader linkurl YoutubeDownloader
+ let mediakey = Backend.URL.fromUrl mediaurl Nothing
+ -- Old versions of git-annex that used quvi might have
+ -- used the quviurl for this, so check i/f it's known
+ -- to avoid adding it a second time.
+ let quviurl = setDownloader linkurl QuviDownloader
+ checkknown mediaurl $ checkknown quviurl $
+ ifM (Annex.getState Annex.fast <||> pure (relaxedOption (downloadOptions opts)))
+ ( addmediafast linkurl mediaurl mediakey
+ , downloadmedia linkurl mediaurl mediakey
+ )
where
forced = Annex.getState Annex.force
@@ -265,6 +265,42 @@ performDownload opts cache todownload = case location todownload of
( return Nothing
, tryanother
)
+
+ downloadmedia linkurl mediaurl mediakey
+ | rawOption (downloadOptions opts) = downloadlink
+ | otherwise = do
+ r <- withTmpWorkDir mediakey $ \workdir -> do
+ dl <- youtubeDl linkurl workdir
+ case dl of
+ Right (Just mediafile) -> do
+ let ext = case takeExtension mediafile of
+ [] -> ".m"
+ s -> s
+ ok <- rundownload linkurl ext $ \f -> do
+ addWorkTree webUUID mediaurl f mediakey (Just mediafile)
+ return [mediakey]
+ return (Just ok)
+ -- youtude-dl didn't support it, so
+ -- download it as if the link were
+ -- an enclosure.
+ Right Nothing -> Just <$> downloadlink
+ Left msg -> do
+ warning msg
+ return Nothing
+ return (fromMaybe False r)
+ where
+ downloadlink = performDownload opts cache todownload
+ { location = Enclosure linkurl }
+
+ addmediafast linkurl mediaurl mediakey =
+ ifM (pure (not (rawOption (downloadOptions opts)))
+ <&&> youtubeDlSupported linkurl)
+ ( rundownload linkurl ".m" $ \f -> do
+ addWorkTree webUUID mediaurl f mediakey Nothing
+ return [mediakey]
+ , performDownload opts cache todownload
+ { location = Enclosure linkurl }
+ )
defaultTemplate :: String
defaultTemplate = "${feedtitle}/${itemtitle}${extension}"
diff --git a/Command/Indirect.hs b/Command/Indirect.hs
index 862c6e00e..825b82004 100644
--- a/Command/Indirect.hs
+++ b/Command/Indirect.hs
@@ -42,7 +42,7 @@ start = ifM isDirect
perform :: CommandPerform
perform = do
- showStart "commit" ""
+ showStart' "commit" Nothing
whenM stageDirect $ do
showOutput
void $ inRepo $ Git.Branch.commitCommand Git.Branch.ManualCommit
@@ -100,6 +100,6 @@ perform = do
cleanup :: CommandCleanup
cleanup = do
- showStart "indirect" ""
+ showStart' "indirect" Nothing
showEndOk
return True
diff --git a/Command/Init.hs b/Command/Init.hs
index 3c38c0f8a..e7f57c29c 100644
--- a/Command/Init.hs
+++ b/Command/Init.hs
@@ -40,7 +40,7 @@ seek = commandAction . start
start :: InitOptions -> CommandStart
start os = do
- showStart "init" (initDesc os)
+ showStart' "init" (Just $ initDesc os)
next $ perform os
perform :: InitOptions -> CommandPerform
diff --git a/Command/InitRemote.hs b/Command/InitRemote.hs
index d82dc366c..c52ca4c56 100644
--- a/Command/InitRemote.hs
+++ b/Command/InitRemote.hs
@@ -38,7 +38,7 @@ start (name:ws) = ifM (isJust <$> findExisting name)
let c = newConfig name
t <- either giveup return (findType config)
- showStart "initremote" name
+ showStart' "initremote" (Just name)
next $ perform t name $ M.union config c
)
)
diff --git a/Command/Map.hs b/Command/Map.hs
index ae568f8cc..9ae73d898 100644
--- a/Command/Map.hs
+++ b/Command/Map.hs
@@ -189,7 +189,7 @@ same a b
{- reads the config of a remote, with progress display -}
scan :: Git.Repo -> Annex Git.Repo
scan r = do
- showStart "map" $ Git.repoDescribe r
+ showStart' "map" (Just $ Git.repoDescribe r)
v <- tryScan r
case v of
Just r' -> do
diff --git a/Command/Merge.hs b/Command/Merge.hs
index 4f99093ab..66b519973 100644
--- a/Command/Merge.hs
+++ b/Command/Merge.hs
@@ -23,7 +23,7 @@ seek _ = do
mergeBranch :: CommandStart
mergeBranch = do
- showStart "merge" "git-annex"
+ showStart' "merge" (Just "git-annex")
next $ do
Annex.Branch.update
-- commit explicitly, in case no remote branches were merged
diff --git a/Command/MetaData.hs b/Command/MetaData.hs
index fd5fd0838..9fba1097a 100644
--- a/Command/MetaData.hs
+++ b/Command/MetaData.hs
@@ -100,7 +100,7 @@ startKeys c o k ai = case getSet o of
putStrLn . fromMetaValue
stop
_ -> do
- showStart' "metadata" k ai
+ showStartKey "metadata" k ai
next $ perform c o k
perform :: VectorClock -> MetaDataOptions -> Key -> CommandPerform
@@ -164,7 +164,7 @@ startBatch (i, (MetaData m)) = case i of
Right k -> go k (mkActionItem k)
where
go k ai = do
- showStart' "metadata" k ai
+ showStartKey "metadata" k ai
let o = MetaDataOptions
{ forFiles = []
, getSet = if MetaData m == emptyMetaData
diff --git a/Command/Move.hs b/Command/Move.hs
index 04e6aa384..63b5fb8b0 100644
--- a/Command/Move.hs
+++ b/Command/Move.hs
@@ -87,7 +87,7 @@ start' o move afile key ai = onlyActionOn key $
toHereStart move afile key ai
showMoveAction :: Bool -> Key -> ActionItem -> Annex ()
-showMoveAction move = showStart' (if move then "move" else "copy")
+showMoveAction move = showStartKey (if move then "move" else "copy")
{- Moves (or copies) the content of an annexed file to a remote.
-
diff --git a/Command/Multicast.hs b/Command/Multicast.hs
index 7b0d54e8e..9a518a18f 100644
--- a/Command/Multicast.hs
+++ b/Command/Multicast.hs
@@ -78,7 +78,7 @@ seek (MultiCastOptions Receive _ _) = giveup "Cannot specify list of files with
genAddress :: CommandStart
genAddress = do
- showStart "gen-address" ""
+ showStart' "gen-address" Nothing
k <- uftpKey
(s, ok) <- case k of
KeyContainer s -> liftIO $ genkey (Param s)
@@ -130,7 +130,7 @@ send ups fs = withTmpFile "send" $ \t h -> do
whenM isDirect $
giveup "Sorry, multicast send cannot be done from a direct mode repository."
- showStart "generating file list" ""
+ showStart' "generating file list" Nothing
fs' <- seekHelper LsFiles.inRepo =<< workTreeItems fs
matcher <- Limit.getMatcher
let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $
@@ -143,7 +143,7 @@ send ups fs = withTmpFile "send" $ \t h -> do
liftIO $ hClose h
showEndOk
- showStart "sending files" ""
+ showStart' "sending files" Nothing
showOutput
serverkey <- uftpKey
u <- getUUID
@@ -169,7 +169,7 @@ send ups fs = withTmpFile "send" $ \t h -> do
receive :: [CommandParam] -> CommandStart
receive ups = do
- showStart "receiving multicast files" ""
+ showStart' "receiving multicast files" Nothing
showNote "Will continue to run until stopped by ctrl-c"
showOutput
diff --git a/Command/NumCopies.hs b/Command/NumCopies.hs
index 9e467da7a..d10f70063 100644
--- a/Command/NumCopies.hs
+++ b/Command/NumCopies.hs
@@ -48,7 +48,7 @@ startGet = next $ next $ do
startSet :: Int -> CommandStart
startSet n = do
allowMessages
- showStart "numcopies" (show n)
+ showStart' "numcopies" (Just $ show n)
next $ next $ do
setGlobalNumCopies $ NumCopies n
return True
diff --git a/Command/P2P.hs b/Command/P2P.hs
index 58b5c3bd7..40a49b49f 100644
--- a/Command/P2P.hs
+++ b/Command/P2P.hs
@@ -97,7 +97,7 @@ genAddresses addrs = do
-- Address is read from stdin, to avoid leaking it in shell history.
linkRemote :: RemoteName -> CommandStart
linkRemote remotename = do
- showStart "p2p link" remotename
+ showStart' "p2p link" (Just remotename)
next $ next promptaddr
where
promptaddr = do
@@ -123,7 +123,7 @@ linkRemote remotename = do
startPairing :: RemoteName -> [P2PAddress] -> CommandStart
startPairing _ [] = giveup "No P2P networks are currrently available."
startPairing remotename addrs = do
- showStart "p2p pair" remotename
+ showStart' "p2p pair" (Just remotename)
ifM (liftIO Wormhole.isInstalled)
( next $ performPairing remotename addrs
, giveup "Magic Wormhole is not installed, and is needed for pairing. Install it from your distribution or from https://github.com/warner/magic-wormhole/"
diff --git a/Command/RegisterUrl.hs b/Command/RegisterUrl.hs
index 008e6436c..ef73e6728 100644
--- a/Command/RegisterUrl.hs
+++ b/Command/RegisterUrl.hs
@@ -27,10 +27,10 @@ seek = withWords start
start :: [String] -> CommandStart
start (keyname:url:[]) = do
let key = mkKey keyname
- showStart "registerurl" url
+ showStart' "registerurl" (Just url)
next $ perform key url
start [] = do
- showStart "registerurl" "stdin"
+ showStart' "registerurl" (Just "stdin")
next massAdd
start _ = giveup "specify a key and an url"
diff --git a/Command/Reinit.hs b/Command/Reinit.hs
index 25001db43..6defa4e95 100644
--- a/Command/Reinit.hs
+++ b/Command/Reinit.hs
@@ -25,7 +25,7 @@ seek = withWords start
start :: [String] -> CommandStart
start ws = do
- showStart "reinit" s
+ showStart' "reinit" (Just s)
next $ perform s
where
s = unwords ws
diff --git a/Command/ResolveMerge.hs b/Command/ResolveMerge.hs
index 0ba6efb36..df72e24d4 100644
--- a/Command/ResolveMerge.hs
+++ b/Command/ResolveMerge.hs
@@ -23,7 +23,7 @@ seek = withNothing start
start :: CommandStart
start = do
- showStart "resolvemerge" ""
+ showStart' "resolvemerge" Nothing
us <- fromMaybe nobranch <$> inRepo Git.Branch.current
d <- fromRepo Git.localGitDir
let merge_head = d </> "MERGE_HEAD"
diff --git a/Command/Schedule.hs b/Command/Schedule.hs
index 5814d99f1..b39e652b2 100644
--- a/Command/Schedule.hs
+++ b/Command/Schedule.hs
@@ -28,7 +28,7 @@ start = parse
parse (name:[]) = go name performGet
parse (name:expr:[]) = go name $ \uuid -> do
allowMessages
- showStart "schedule" name
+ showStart' "schedule" (Just name)
performSet expr uuid
parse _ = giveup "Specify a repository."
diff --git a/Command/SetPresentKey.hs b/Command/SetPresentKey.hs
index da2a6fa3d..6e7075e8c 100644
--- a/Command/SetPresentKey.hs
+++ b/Command/SetPresentKey.hs
@@ -23,7 +23,7 @@ seek = withWords start
start :: [String] -> CommandStart
start (ks:us:vs:[]) = do
- showStart' "setpresentkey" k (mkActionItem k)
+ showStartKey "setpresentkey" k (mkActionItem k)
next $ perform k (toUUID us) s
where
k = fromMaybe (giveup "bad key") (file2key ks)
diff --git a/Command/Sync.hs b/Command/Sync.hs
index b2d0bd275..f63260ed4 100644
--- a/Command/Sync.hs
+++ b/Command/Sync.hs
@@ -299,7 +299,7 @@ syncRemotes' ps available =
commit :: SyncOptions -> CommandStart
commit o = stopUnless shouldcommit $ next $ next $ do
commitmessage <- maybe commitMsg return (messageOption o)
- showStart "commit" ""
+ showStart' "commit" Nothing
Annex.Branch.commit "update"
ifM isDirect
( do
@@ -342,7 +342,7 @@ mergeLocal mergeconfig resolvemergeoverride currbranch@(Just _, _) =
where
go Nothing = stop
go (Just syncbranch) = do
- showStart "merge" $ Git.Ref.describe syncbranch
+ showStart' "merge" (Just $ Git.Ref.describe syncbranch)
next $ next $ merge currbranch mergeconfig resolvemergeoverride Git.Branch.ManualCommit syncbranch
mergeLocal _ _ (Nothing, madj) = do
b <- inRepo Git.Branch.currentUnsafe
@@ -401,7 +401,7 @@ updateBranch syncbranch updateto g =
pullRemote :: SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandStart
pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && wantpull) $ do
- showStart "pull" (Remote.name remote)
+ showStart' "pull" (Just (Remote.name remote))
next $ do
showOutput
stopUnless fetch $
@@ -438,7 +438,7 @@ mergeRemote remote currbranch mergeconfig resolvemergeoverride = ifM isBareRepo
pushRemote :: SyncOptions -> Remote -> CurrBranch -> CommandStart
pushRemote _o _remote (Nothing, _) = stop
pushRemote o remote (Just branch, _) = stopUnless (pure (pushOption o) <&&> needpush) $ do
- showStart "push" (Remote.name remote)
+ showStart' "push" (Just (Remote.name remote))
next $ next $ do
showOutput
ok <- inRepoWithSshOptionsTo (Remote.repo remote) gc $
@@ -651,7 +651,7 @@ syncFile ebloom rs af k = onlyActionOn' k $ do
, return []
)
get have = includeCommandAction $ do
- showStart' "get" k (mkActionItem af)
+ showStartKey "get" k (mkActionItem af)
next $ next $ getKey' k af have
wantput r
@@ -703,7 +703,7 @@ seekExportContent rs = or <$> forM rs go
cleanupLocal :: CurrBranch -> CommandStart
cleanupLocal (Nothing, _) = stop
cleanupLocal (Just currb, _) = do
- showStart "cleanup" "local"
+ showStart' "cleanup" (Just "local")
next $ next $ do
delbranch $ syncBranch currb
delbranch $ syncBranch $ Git.Ref.base $ Annex.Branch.name
@@ -717,7 +717,7 @@ cleanupLocal (Just currb, _) = do
cleanupRemote :: Remote -> CurrBranch -> CommandStart
cleanupRemote _ (Nothing, _) = stop
cleanupRemote remote (Just b, _) = do
- showStart "cleanup" (Remote.name remote)
+ showStart' "cleanup" (Just (Remote.name remote))
next $ next $
inRepo $ Git.Command.runBool
[ Param "push"
diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs
index 75e438d79..1a5da42b2 100644
--- a/Command/TestRemote.hs
+++ b/Command/TestRemote.hs
@@ -58,7 +58,7 @@ seek o = commandAction $ start (fromInteger $ sizeOption o) (testRemote o)
start :: Int -> RemoteName -> CommandStart
start basesz name = do
- showStart "testremote" name
+ showStart' "testremote" (Just name)
fast <- Annex.getState Annex.fast
r <- either giveup disableExportTree =<< Remote.byName' name
rs <- catMaybes <$> mapM (adjustChunkSize r) (chunkSizes basesz fast)
diff --git a/Command/Trust.hs b/Command/Trust.hs
index 66a1c81b3..5f161af26 100644
--- a/Command/Trust.hs
+++ b/Command/Trust.hs
@@ -27,7 +27,7 @@ trustCommand c level = withWords start
where
start ws = do
let name = unwords ws
- showStart c name
+ showStart' c (Just name)
u <- Remote.nameToUUID name
next $ perform u
perform uuid = do
diff --git a/Command/Ungroup.hs b/Command/Ungroup.hs
index ddcdba466..357f5685f 100644
--- a/Command/Ungroup.hs
+++ b/Command/Ungroup.hs
@@ -23,7 +23,7 @@ seek = withWords start
start :: [String] -> CommandStart
start (name:g:[]) = do
- showStart "ungroup" name
+ showStart' "ungroup" (Just name)
u <- Remote.nameToUUID name
next $ perform u g
start _ = giveup "Specify a repository and a group."
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 916e6db25..27018cf38 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -70,7 +70,7 @@ start o = do
Just "." -> (".", checkUnused refspec)
Just "here" -> (".", checkUnused refspec)
Just n -> (n, checkRemoteUnused n refspec)
- showStart "unused" name
+ showStart' "unused" (Just name)
next perform
checkUnused :: RefSpec -> CommandPerform
@@ -338,5 +338,5 @@ startUnused message unused badunused tmpunused maps n = search
case M.lookup n m of
Nothing -> search rest
Just key -> do
- showStart message (show n)
+ showStart' message (Just $ show n)
next $ a key
diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs
index 696b794a5..1c3e62695 100644
--- a/Command/Upgrade.hs
+++ b/Command/Upgrade.hs
@@ -23,7 +23,7 @@ seek = withNothing start
start :: CommandStart
start = do
- showStart "upgrade" "."
+ showStart' "upgrade" Nothing
whenM (isNothing <$> getVersion) $ do
initialize Nothing Nothing
r <- upgrade False latestVersion
diff --git a/Command/VAdd.hs b/Command/VAdd.hs
index c94ce5722..800573732 100644
--- a/Command/VAdd.hs
+++ b/Command/VAdd.hs
@@ -23,7 +23,7 @@ seek = withWords start
start :: [String] -> CommandStart
start params = do
- showStart "vadd" ""
+ showStart' "vadd" Nothing
withCurrentView $ \view -> do
let (view', change) = refineView view $
map parseViewParam $ reverse params
diff --git a/Command/VCycle.hs b/Command/VCycle.hs
index 28326e16f..9c1e24e0d 100644
--- a/Command/VCycle.hs
+++ b/Command/VCycle.hs
@@ -27,7 +27,7 @@ start = go =<< currentView
where
go Nothing = giveup "Not in a view."
go (Just v) = do
- showStart "vcycle" ""
+ showStart' "vcycle" Nothing
let v' = v { viewComponents = vcycle [] (viewComponents v) }
if v == v'
then do
diff --git a/Command/VFilter.hs b/Command/VFilter.hs
index 130e2550c..fa545cddd 100644
--- a/Command/VFilter.hs
+++ b/Command/VFilter.hs
@@ -21,7 +21,7 @@ seek = withWords start
start :: [String] -> CommandStart
start params = do
- showStart "vfilter" ""
+ showStart' "vfilter" Nothing
withCurrentView $ \view -> do
let view' = filterView view $
map parseViewParam $ reverse params
diff --git a/Command/VPop.hs b/Command/VPop.hs
index 58411001b..71da586c2 100644
--- a/Command/VPop.hs
+++ b/Command/VPop.hs
@@ -28,7 +28,7 @@ start ps = go =<< currentView
where
go Nothing = giveup "Not in a view."
go (Just v) = do
- showStart "vpop" (show num)
+ showStart' "vpop" (Just $ show num)
removeView v
(oldvs, vs) <- splitAt (num - 1) . filter (sameparentbranch v)
<$> recentViews
diff --git a/Command/View.hs b/Command/View.hs
index 513e6d10c..298fad0ee 100644
--- a/Command/View.hs
+++ b/Command/View.hs
@@ -27,7 +27,7 @@ seek = withWords start
start :: [String] -> CommandStart
start [] = giveup "Specify metadata to include in view"
start ps = do
- showStart "view" ""
+ showStart' "view" Nothing
view <- mkView ps
go view =<< currentView
where
diff --git a/Command/Wanted.hs b/Command/Wanted.hs
index fc1fa86bd..ac4175ecd 100644
--- a/Command/Wanted.hs
+++ b/Command/Wanted.hs
@@ -35,7 +35,7 @@ cmd' name desc getter setter = noMessages $
start (rname:[]) = go rname (performGet getter)
start (rname:expr:[]) = go rname $ \uuid -> do
allowMessages
- showStart name rname
+ showStart' name (Just rname)
performSet setter expr uuid
start _ = giveup "Specify a repository."
diff --git a/Command/Whereis.hs b/Command/Whereis.hs
index c5543bc66..295d11994 100644
--- a/Command/Whereis.hs
+++ b/Command/Whereis.hs
@@ -53,7 +53,7 @@ start remotemap file key = startKeys remotemap key (mkActionItem afile)
startKeys :: M.Map UUID Remote -> Key -> ActionItem -> CommandStart
startKeys remotemap key ai = do
- showStart' "whereis" key ai
+ showStartKey "whereis" key ai
next $ perform remotemap key
perform :: M.Map UUID Remote -> Key -> CommandPerform
diff --git a/Logs/Web.hs b/Logs/Web.hs
index ba71cb17d..abea00db6 100644
--- a/Logs/Web.hs
+++ b/Logs/Web.hs
@@ -100,15 +100,15 @@ removeTempUrl :: Key -> Annex ()
removeTempUrl key = Annex.changeState $ \s ->
s { Annex.tempurls = M.delete key (Annex.tempurls s) }
-data Downloader = WebDownloader | QuviDownloader | OtherDownloader
+data Downloader = WebDownloader | YoutubeDownloader | QuviDownloader | OtherDownloader
deriving (Eq, Show)
{- To keep track of how an url is downloaded, it's mangled slightly in
- - the log. For quvi, "quvi:" is prefixed. For urls that are handled by
- - some other remote, ":" is prefixed. -}
+ - the log, with a prefix indicating when a Downloader is used. -}
setDownloader :: URLString -> Downloader -> String
setDownloader u WebDownloader = u
setDownloader u QuviDownloader = "quvi:" ++ u
+setDownloader u YoutubeDownloader = "yt:" ++ u
setDownloader u OtherDownloader = ":" ++ u
setDownloader' :: URLString -> Remote -> String
@@ -118,6 +118,9 @@ setDownloader' u r
getDownloader :: URLString -> (URLString, Downloader)
getDownloader u = case separate (== ':') u of
- ("quvi", u') -> (u', QuviDownloader)
+ ("yt", u') -> (u', YoutubeDownloader)
+ -- quvi is not used any longer; youtube-dl should be able to handle
+ -- all urls it did.
+ ("quvi", u') -> (u', YoutubeDownloader)
("", u') -> (u', OtherDownloader)
_ -> (u, WebDownloader)
diff --git a/Messages.hs b/Messages.hs
index ff13b31ee..08a7bb719 100644
--- a/Messages.hs
+++ b/Messages.hs
@@ -7,9 +7,10 @@
module Messages (
showStart,
+ showStart',
+ showStartKey,
ActionItem,
mkActionItem,
- showStart',
showNote,
showAction,
showSideAction,
@@ -66,8 +67,14 @@ showStart command file = outputMessage json $
where
json = JSON.start command (Just file) Nothing
-showStart' :: String -> Key -> ActionItem -> Annex ()
-showStart' command key i = outputMessage json $
+showStart' :: String -> Maybe String -> Annex ()
+showStart' command mdesc = outputMessage json $
+ command ++ (maybe "" (" " ++) mdesc) ++ " "
+ where
+ json = JSON.start command Nothing Nothing
+
+showStartKey :: String -> Key -> ActionItem -> Annex ()
+showStartKey command key i = outputMessage json $
command ++ " " ++ actionItemDesc i key ++ " "
where
json = JSON.start command (actionItemWorkTreeFile i) (Just key)
diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs
index f7e9759a4..83e08c5aa 100644
--- a/Remote/Helper/Special.hs
+++ b/Remote/Helper/Special.hs
@@ -107,7 +107,7 @@ fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
fileRetriever a k m callback = do
f <- prepTmp k
a f k m
- callback (FileContent f)
+ pruneTmpWorkDirBefore f (callback . FileContent)
-- A Retriever that generates a lazy ByteString containing the Key's
-- content, and passes it to a callback action which will fully consume it
diff --git a/Remote/Web.hs b/Remote/Web.hs
index 233c17eb3..5aa1edd3e 100644
--- a/Remote/Web.hs
+++ b/Remote/Web.hs
@@ -19,8 +19,7 @@ import Logs.Web
import Annex.UUID
import Utility.Metered
import qualified Annex.Url as Url
-import Annex.Quvi
-import qualified Utility.Quvi as Quvi
+import Annex.YoutubeDl
remote :: RemoteType
remote = RemoteType
@@ -80,9 +79,7 @@ downloadKey key _af dest p = unVerified $ get =<< getWebUrls key
untilTrue urls $ \u -> do
let (u', downloader) = getDownloader u
case downloader of
- QuviDownloader -> do
- flip (downloadUrl key p) dest
- =<< withQuviOptions Quvi.queryLinks [Quvi.httponly, Quvi.quiet] u'
+ YoutubeDownloader -> youtubeDlTo key u' dest
_ -> downloadUrl key p [u'] dest
downloadKeyCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
@@ -109,8 +106,7 @@ checkKey' key us = firsthit us (Right False) $ \u -> do
let (u', downloader) = getDownloader u
showChecking u'
case downloader of
- QuviDownloader ->
- Right <$> withQuviOptions Quvi.check [Quvi.httponly, Quvi.quiet] u'
+ YoutubeDownloader -> youtubeDlCheck u'
_ -> do
Url.withUrlOptions $ catchMsgIO .
Url.checkBoth u' (keySize key)
@@ -126,4 +122,4 @@ getWebUrls :: Key -> Annex [URLString]
getWebUrls key = filter supported <$> getUrls key
where
supported u = snd (getDownloader u)
- `elem` [WebDownloader, QuviDownloader]
+ `elem` [WebDownloader, YoutubeDownloader]
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index 05b5623a6..9a48ad173 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -67,7 +67,7 @@ data GitConfig = GitConfig
, annexSyncContent :: Configurable Bool
, annexDebug :: Bool
, annexWebOptions :: [String]
- , annexQuviOptions :: [String]
+ , annexYoutubeDlOptions :: [String]
, annexAriaTorrentOptions :: [String]
, annexWebDownloadCommand :: Maybe String
, annexCrippledFileSystem :: Bool
@@ -127,7 +127,7 @@ extractGitConfig r = GitConfig
getmaybebool (annex "synccontent")
, annexDebug = getbool (annex "debug") False
, annexWebOptions = getwords (annex "web-options")
- , annexQuviOptions = getwords (annex "quvi-options")
+ , annexYoutubeDlOptions = getwords (annex "youtube-dl-options")
, annexAriaTorrentOptions = getwords (annex "aria-torrent-options")
, annexWebDownloadCommand = getmaybe (annex "web-download-command")
, annexCrippledFileSystem = getbool (annex "crippledfilesystem") False
diff --git a/Types/Transfer.hs b/Types/Transfer.hs
index ade8fc763..1b54f8511 100644
--- a/Types/Transfer.hs
+++ b/Types/Transfer.hs
@@ -5,9 +5,12 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE FlexibleInstances #-}
+
module Types.Transfer where
import Types
+import Types.Remote (Verification(..))
import Utility.PID
import Utility.QuickCheck
@@ -66,3 +69,25 @@ instance Arbitrary TransferInfo where
-- associated file cannot be empty (but can be Nothing)
<*> (AssociatedFile <$> arbitrary `suchThat` (/= Just ""))
<*> arbitrary
+
+class Observable a where
+ observeBool :: a -> Bool
+ observeFailure :: a
+
+instance Observable Bool where
+ observeBool = id
+ observeFailure = False
+
+instance Observable (Bool, Verification) where
+ observeBool = fst
+ observeFailure = (False, UnVerified)
+
+instance Observable (Either e Bool) where
+ observeBool (Left _) = False
+ observeBool (Right b) = b
+ observeFailure = Right False
+
+instance Observable (Maybe a) where
+ observeBool (Just _) = True
+ observeBool Nothing = False
+ observeFailure = Nothing
diff --git a/Utility/HtmlDetect.hs b/Utility/HtmlDetect.hs
new file mode 100644
index 000000000..57a56c95f
--- /dev/null
+++ b/Utility/HtmlDetect.hs
@@ -0,0 +1,35 @@
+{- html detection
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - License: BSD-2-clause
+ -}
+
+module Utility.HtmlDetect where
+
+import Text.HTML.TagSoup
+import Data.Char
+
+-- | Detect if a string is a html document.
+--
+-- The document many not be valid, and will still be detected as html,
+-- as long as it starts with a "<html>" or "<!DOCTYPE html>" tag.
+--
+-- Html fragments like "<p>this</p>" are not detected as being html,
+-- although some browsers may chose to render them as html.
+isHtml :: String -> Bool
+isHtml = evaluate . canonicalizeTags . parseTags . shorten
+ where
+ -- We only care about the beginning of the file,
+ -- so although tagsoup parses lazily anyway, truncate it.
+ shorten = take 16384
+ evaluate (TagOpen "!DOCTYPE" ((t, _):_):_) = map toLower t == "html"
+ evaluate (TagOpen "html" _:_) = True
+ -- Allow some leading whitespace before the tag.
+ evaluate (TagText t:rest)
+ | all isSpace t = evaluate rest
+ | otherwise = False
+ -- It would be pretty weird to have a html comment before the html
+ -- tag, but easy to allow for.
+ evaluate (TagComment _:rest) = evaluate rest
+ evaluate _ = False
diff --git a/Utility/Quvi.hs b/Utility/Quvi.hs
deleted file mode 100644
index ff1ad854c..000000000
--- a/Utility/Quvi.hs
+++ /dev/null
@@ -1,162 +0,0 @@
-{- querying quvi (import qualified)
- -
- - Copyright 2013 Joey Hess <id@joeyh.name>
- -
- - License: BSD-2-clause
- -}
-
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE FlexibleContexts #-}
-
-module Utility.Quvi where
-
-import Common
-import Utility.Url
-
-import Data.Aeson
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.Map as M
-import Network.URI (uriAuthority, uriRegName)
-import Data.Char
-
-data QuviVersion
- = Quvi04
- | Quvi09
- | NoQuvi
- deriving (Show)
-
-data Page = Page
- { pageTitle :: String
- , pageLinks :: [Link]
- } deriving (Show)
-
-data Link = Link
- { linkSuffix :: Maybe String
- , linkUrl :: URLString
- } deriving (Show)
-
-{- JSON instances for quvi 0.4. -}
-instance FromJSON Page where
- parseJSON (Object v) = Page
- <$> v .: "page_title"
- <*> v .: "link"
- parseJSON _ = mzero
-
-instance FromJSON Link where
- parseJSON (Object v) = Link
- <$> v .:? "file_suffix"
- <*> v .: "url"
- parseJSON _ = mzero
-
-{- "enum" format used by quvi 0.9 -}
-parseEnum :: String -> Maybe Page
-parseEnum s = Page
- <$> get "QUVI_MEDIA_PROPERTY_TITLE"
- <*> ((:[]) <$>
- ( Link
- <$> Just <$> (get "QUVI_MEDIA_STREAM_PROPERTY_CONTAINER")
- <*> get "QUVI_MEDIA_STREAM_PROPERTY_URL"
- )
- )
- where
- get = flip M.lookup m
- m = M.fromList $ map (separate (== '=')) $ lines s
-
-probeVersion :: IO QuviVersion
-probeVersion = catchDefaultIO NoQuvi $
- examine <$> processTranscript "quvi" ["--version"] Nothing
- where
- examine (s, True)
- | "quvi v0.4" `isInfixOf` s = Quvi04
- | otherwise = Quvi09
- examine _ = NoQuvi
-
-type Query a = QuviVersion -> [CommandParam] -> URLString -> IO a
-
-{- Throws an error when quvi is not installed. -}
-forceQuery :: Query (Maybe Page)
-forceQuery v ps url = query' v ps url `catchNonAsync` onerr
- where
- onerr e = ifM (inPath "quvi")
- ( giveup ("quvi failed: " ++ show e)
- , giveup "quvi is not installed"
- )
-
-{- Returns Nothing if the page is not a video page, or quvi is not
- - installed. -}
-query :: Query (Maybe Page)
-query v ps url = flip catchNonAsync (const $ return Nothing) (query' v ps url)
-
-query' :: Query (Maybe Page)
-query' Quvi09 ps url = parseEnum
- <$> readQuvi (toCommand $ [Param "dump", Param "-p", Param "enum"] ++ ps ++ [Param url])
-query' Quvi04 ps url = do
- let p = proc "quvi" (toCommand $ ps ++ [Param url])
- decode . BL.fromStrict
- <$> withHandle StdoutHandle createProcessSuccess p B.hGetContents
-query' NoQuvi _ _ = return Nothing
-
-queryLinks :: Query [URLString]
-queryLinks v ps url = maybe [] (map linkUrl . pageLinks) <$> query v ps url
-
-{- Checks if quvi can still find a download link for an url.
- - If quvi is not installed, returns False. -}
-check :: Query Bool
-check v ps url = maybe False (not . null . pageLinks) <$> query v ps url
-
-{- Checks if an url is supported by quvi, as quickly as possible
- - (without hitting it if possible), and without outputting
- - anything. Also returns False if quvi is not installed. -}
-supported :: QuviVersion -> URLString -> IO Bool
-supported NoQuvi _ = return False
-supported Quvi04 url = boolSystem "quvi"
- [ Param "--verbosity", Param "mute"
- , Param "--support"
- , Param url
- ]
-{- Use quvi-info to see if the url's domain is supported.
- - If so, have to do a online verification of the url. -}
-supported Quvi09 url = (firstlevel <&&> secondlevel)
- `catchNonAsync` (\_ -> return False)
- where
- firstlevel = case uriAuthority =<< parseURIRelaxed url of
- Nothing -> return False
- Just auth -> do
- let domain = map toLower $ uriRegName auth
- let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ splitc '.' domain
- any (\h -> domain `isSuffixOf` h || basedomain `isSuffixOf` h)
- . map (map toLower) <$> listdomains Quvi09
- secondlevel = snd <$> processTranscript "quvi"
- (toCommand [Param "dump", Param "-o", Param url]) Nothing
-
-listdomains :: QuviVersion -> IO [String]
-listdomains Quvi09 = concatMap (splitc ',')
- . concatMap (drop 1 . words)
- . filter ("domains: " `isPrefixOf`) . lines
- <$> readQuvi (toCommand [Param "info", Param "-p", Param "domains"])
-listdomains _ = return []
-
-type QuviParams = QuviVersion -> [CommandParam]
-
-{- Disables progress, but not information output. -}
-quiet :: QuviParams
--- Cannot use quiet as it now disables informational output.
--- No way to disable progress.
-quiet Quvi09 = [Param "--verbosity", Param "verbose"]
-quiet Quvi04 = [Param "--verbosity", Param "quiet"]
-quiet NoQuvi = []
-
-{- Only return http results, not streaming protocols. -}
-httponly :: QuviParams
--- No way to do it with 0.9?
-httponly Quvi04 = [Param "-c", Param "http"]
-httponly _ = [] -- No way to do it with 0.9?
-
-readQuvi :: [String] -> IO String
-readQuvi ps = withHandle StdoutHandle createProcessSuccess p $ \h -> do
- r <- hGetContentsStrict h
- hClose h
- return r
- where
- p = proc "quvi" ps
diff --git a/debian/control b/debian/control
index b34a79002..a20506002 100644
--- a/debian/control
+++ b/debian/control
@@ -25,6 +25,7 @@ Build-Depends:
libghc-dlist-dev,
libghc-uuid-dev,
libghc-aeson-dev,
+ libghc-tagsoup-dev,
libghc-unordered-containers-dev,
libghc-ifelse-dev,
libghc-bloomfilter-dev,
@@ -106,7 +107,7 @@ Recommends:
lsof,
gnupg,
bind9-host,
- quvi,
+ youtube-dl,
git-remote-gcrypt (>= 0.20130908-6),
nocache,
aria2,
diff --git a/doc/devblog/youtube-dl_day_3.mdwn b/doc/devblog/youtube-dl_day_3.mdwn
new file mode 100644
index 000000000..bc868d25c
--- /dev/null
+++ b/doc/devblog/youtube-dl_day_3.mdwn
@@ -0,0 +1,7 @@
+Finished up youtube-dl integration today, including all the edge cases in
+`addurl` and honoring annex.diskreserve.
+
+I changed my mind about `git annex addurl --relaxed`; it seems better for
+it to be slower than before, but not have surprising behavior, than to be
+fast but potentially surprising. If it's too slow, add `--raw` to avoid
+using youtube-dl.
diff --git a/doc/forum/downloading_mp4_by_default_with_importfeed/comment_1_c9e5a3700764faa33d2c68de6a8236dc._comment b/doc/forum/downloading_mp4_by_default_with_importfeed/comment_1_c9e5a3700764faa33d2c68de6a8236dc._comment
index 23282e835..07691d823 100644
--- a/doc/forum/downloading_mp4_by_default_with_importfeed/comment_1_c9e5a3700764faa33d2c68de6a8236dc._comment
+++ b/doc/forum/downloading_mp4_by_default_with_importfeed/comment_1_c9e5a3700764faa33d2c68de6a8236dc._comment
@@ -8,4 +8,8 @@ option to make it prefer mp4, you can pass it using the annex.quvi-options
git configuration setting. Or maybe it has a config file. I don't
know how to configure quvi to do that, but if it's possible to, you should
be able to get git-annex to run it with that configuration.
+
+Update: Now git-annex uses youtube-dl, so you can use the
+annex.youtube-dl-options git configuration setting to configure youtube-dl
+to download your preferred format.
"""]]
diff --git a/doc/git-annex-addurl.mdwn b/doc/git-annex-addurl.mdwn
index aace42009..a43976b56 100644
--- a/doc/git-annex-addurl.mdwn
+++ b/doc/git-annex-addurl.mdwn
@@ -10,8 +10,8 @@ git annex addurl `[url ...]`
Downloads each url to its own file, which is added to the annex.
-When `quvi` is installed, urls are automatically tested to see if they
-point to a video hosting site, and the video is downloaded instead.
+When `youtube-dl` is installed, it's used to check for a video embedded in
+a web page at the url, and that is added to the annex instead.
Urls to torrent files (including magnet links) will cause the content of
the torrent to be downloaded, using `aria2c`.
@@ -30,12 +30,17 @@ be used to get better filenames.
* `--relaxed`
- Avoid storing the size of the url's content, and accept whatever
- content is there at a future point. (Implies `--fast`.)
+ Don't immediately download the url, and avoid storing the size of the
+ url's content. This makes git-annex accept whatever content is there
+ at a future point.
+ This is the fastest option, but it still has to access the network
+ to check if the url contains embedded media. When adding large numbers
+ of urls, using `--relaxed --raw` is much faster.
+
* `--raw`
- Prevent special handling of urls by quvi, bittorrent, and other
+ Prevent special handling of urls by youtube-dl, bittorrent, and other
special remotes. This will for example, make addurl
download the .torrent file and not the contents it points to.
diff --git a/doc/git-annex-importfeed.mdwn b/doc/git-annex-importfeed.mdwn
index 241d369af..2f146fbfe 100644
--- a/doc/git-annex-importfeed.mdwn
+++ b/doc/git-annex-importfeed.mdwn
@@ -8,14 +8,13 @@ git annex importfeed `[url ...]`
# DESCRIPTION
-Imports the contents of podcast feeds. Only downloads files whose
+Imports the contents of podcasts and other feeds. Only downloads files whose
content has not already been added to the repository before, so you can
delete, rename, etc the resulting files and repeated runs won't duplicate
them.
-When quvi is installed, links in the feed are tested to see if they
-are on a video hosting site, and the video is downloaded. This allows
-importing e.g., YouTube playlists.
+When `youtube-dl` is installed, it's used to download links in the feed.
+This allows importing e.g., YouTube playlists.
To make the import process add metadata to the imported files from the feed,
`git config annex.genmetadata true`
@@ -38,6 +37,23 @@ To make the import process add metadata to the imported files from the feed,
These options behave the same as when using [[git-annex-addurl]](1).
+* `--fast`
+
+ Avoid immediately downloading urls. The url is still checked
+ (via HEAD) to verify that it exists, and to get its size if possible.
+
+* `--relaxed`
+
+ Don't immediately download urls, and avoid storing the size of the
+ url's content. This makes git-annex accept whatever content is there
+ at a future point.
+
+* `--raw`
+
+ Prevent special handling of urls by youtube-dl, bittorrent, and other
+ special remotes. This will for example, make importfeed
+ download a .torrent file and not the contents it points to.
+
# SEE ALSO
[[git-annex]](1)
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index c3aa8b991..8f84f1d9e 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -1315,10 +1315,16 @@ Here are all the supported configuration settings.
Options to pass when running wget or curl.
For example, to force IPv4 only, set it to "-4"
-* `annex.quvi-options`
+* `annex.youtube-dl-options`
- Options to pass to quvi when using it to find the url to download for a
- video.
+ Options to pass to youtube-dl when using it to find the url to download
+ for a video.
+
+ Some options may break git-annex's integration with youtube-dl. For
+ example, the --output option could cause it to store files somewhere
+ git-annex won't find them. Avoid setting here or in the youtube-dl config
+ file any options that cause youtube-dl to download more than one file,
+ or to store the file anywhere other than the current working directory.
* `annex.aria-torrent-options`
diff --git a/doc/tips/downloading_podcasts.mdwn b/doc/tips/downloading_podcasts.mdwn
index 2926c6710..13b8300db 100644
--- a/doc/tips/downloading_podcasts.mdwn
+++ b/doc/tips/downloading_podcasts.mdwn
@@ -73,13 +73,16 @@ and transferring to your laptop on demand.
## youtube playlists
-If your git-annex is also built with quvi support, you can also use
-`git annex importfeed` on youtube playlists. It will automatically download
-the videos linked to by the playlist.
-
-For this you need an rss file containing links to the videos.
-For example, this url currently works:
-<http://gdata.youtube.com/feeds/api/playlists/PLz8ZG1e9MPlzefklz1Gv79icjywTXycR->
+You can also use `git annex importfeed` on youtube playlists.
+It will use [youtube-dl](https://rg3.github.io/youtube-dl/) to automatically
+download the videos linked to by the playlist.
+
+To download a youtube playlist, you need to find the feed associated with that
+playlist, and pass it to `git annex importfeed`. There does not seem to be
+an easy link anywhere to get the feed, but you can construct its url
+manually. For a playlist like
+"https://www.youtube.com/playlist?list=PL4F80C7D2DC8D9B6C", the
+feed is "https://www.youtube.com/feeds/videos.xml?playlist_id=PL4F80C7D2DC8D9B6C"
## metadata
diff --git a/doc/tips/using_the_web_as_a_special_remote.mdwn b/doc/tips/using_the_web_as_a_special_remote.mdwn
index 2dc3419ad..8bd9c7457 100644
--- a/doc/tips/using_the_web_as_a_special_remote.mdwn
+++ b/doc/tips/using_the_web_as_a_special_remote.mdwn
@@ -68,41 +68,34 @@ number takes that many paths from the end.
## videos
-<a name=quvi></a>
+<a name=videos></a>
There's support for downloading videos from sites like YouTube, Vimeo,
-and many more. This relies on [quvi](http://quvi.sourceforge.net/) to find
-urls to the actual videos files.
+and many more. This relies on [youtube-dl](https://rg3.github.io/youtube-dl/)
+to download the videos.
-When you have quvi installed, you can just
+When you have youtube-dl installed, you can just
`git annex addurl http://youtube.com/foo` and it will detect that
it is a video and download the video content for offline viewing.
Later, in another clone of the repository, you can run `git annex get` on
-the file and it will also be downloaded with the help of quvi. This works
+the file and it will also be downloaded with youtube-dl. This works
even if the video host has transcoded or otherwise changed the video
in the meantime; the assumption is that these video files are equivalent.
-There is an `annex.quvi-options` configuration setting that can be used
+There is an `annex.youtube-dl-options` configuration setting that can be used
to pass parameters to quvi. For example, you could set `git config
-annex.quvi-options "--format low"` to configure it to download low
-quality videos from YouTube.
-
-Note that for performance reasons, the url is not checked for redirects,
-so some shortened urls will not be detected. You can
-either load the short url in a browser to get the full url, or you
-can force use of quvi with redirect detection, by prepending "quvi:" to the
-url.
-
-To download a youtube playlist, you need to find the feed associated with that
-playlist, and pass it to `git annex importfeed`. There does not seem to be
-an easy link anywhere to get the feed, but you can construct its url
+annex.youtube-dl-options "--format worst"` to configure it to download low
+quality videos from YouTube. Note that the youtube-dl configuration files
+are not read when git-annex runs youtube-dl, to avoid config settings that
+break its integration.
+
+To download a youtube playlist, you need to find the RSS feed associated with
+that playlist, and pass it to `git annex importfeed`. There does not seem to
+be an easy link anywhere to get the RSS feed, but you can construct its url
manually. For a playlist like
"https://www.youtube.com/playlist?list=PL4F80C7D2DC8D9B6C", the
-feed is "http://gdata.youtube.com/feeds/api/playlists/PL4F80C7D2DC8D9B6C"
-
-More details about youtube feeds at <http://googlesystem.blogspot.com/2008/01/youtube-feeds.html>
--- `git-annex importfeed` should handle all of them.
+feed is "https://www.youtube.com/feeds/videos.xml?playlist_id=PL4F80C7D2DC8D9B6C"
## bittorrent
diff --git a/doc/todo/switch_from_quvi_to_youtube-dl.mdwn b/doc/todo/switch_from_quvi_to_youtube-dl.mdwn
index cfdd8a8a6..3fa7b0ccb 100644
--- a/doc/todo/switch_from_quvi_to_youtube-dl.mdwn
+++ b/doc/todo/switch_from_quvi_to_youtube-dl.mdwn
@@ -16,21 +16,43 @@ urls, see for example http://bugs.debian.org/874321)
So, switching to youtube-dl would probably need a new switch, like `git
annex addurl --rip` that enables using it.
-Currently `git annex importfeed` automatically tests for video urls with
-quvi; it would also need to support `--rip`.
+(Importfeed only treats links in the feed as video urls, not enclosures,
+so this problem does not affect it and it would not need a new switch.)
-Both of those changes would need changes to user's workflows and cron jobs.
-git-annex could keep supporting quvi for some time, and warn when it uses
-quvi, to help with the transition.
+That would need changes to users' workflows. git-annex could keep
+supporting quvi for some time, and warn when it uses quvi, to
+help with the transition.
+
+> Alternatively, git-annex addurl could download the url first, and then
+> check the file to see if it looks like html. If so, run youtube-dl (which
+> unfortunately has to download it again) and see if it manages to rip
+> media from it. This way, addurl of non-html files does not have extra
+> overhead, and the redundant download is fairly small compared to ripping
+> the media. Only the unusual case where addurl is being used on html that
+> does not contain media becomes more expensive.
+>
+> However, for --relaxed, running youtube-dl --get-filename would be
+> significantly more expensive since it hits the network. It seems that
+> --relaxed would need to change to not rip videos; users who want that
+> could use --fast.
+>
+> --fast already hits the network, but
+> if it uses youtube-dl --get-filename, it would fall afoul of
+> bugs like <http://bugs.debian.org/874321>, although those can be worked
+> around (/dev/null stderr in cast youtube-dl crashes)
Another gotcha is playlists. youtube-dl downloads playlists automatically.
But, git-annex needs to record an url that downloads a single file so that
`git annex get` works right. So, playlists will need to be disabled when
git-annex runs youtube-dl. But, `--no-playlist` does not always disable
-playlists. Best option seems to be `--playlist-items 0` which works for
+playlists. Best option seems to be `--no-playlist --playlist-items 0` which works for
non-playlists, and downloads only 1 item from playlists (hopefully a fairly
stable item, but who knows..).
+(`git annex importfeed` handles youtube playlist downloads, but needs the
+user to find the url to the rss feed for the playlist. Youtube still has
+these, although it makes them hard to find.)
+
Another gotcha is that youtube-dl's -o option does not fully determine the
filename it downloads to. Sometims it will tack on an additional extension
(seen with youtube videos where it added a ".mkv").
@@ -38,3 +60,12 @@ And --get-filename does not report the actual filename when that happens.
This seems to be due to format merging by ffmpeg; with -f best, it does
not merge and so does not do that.
<https://github.com/rg3/youtube-dl/issues/14864>
+
+To do disk free space checking will need a different technique than
+git-annex normally uses, because youtube-dl does not provide an easy way to
+query for size. Could use --dump-json, but that would require downloading
+the web page yet again, so too expensive.. and, the json seems to have
+"filesize: null" for youtube videos. What does work is the --max-filesize
+option, which makes youtube-dl abort if it's too big.
+
+> [[done]] --[[Joey]]
diff --git a/git-annex.cabal b/git-annex.cabal
index 5d46caed3..4e2269a51 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -347,6 +347,7 @@ Executable git-annex
persistent,
persistent-template,
aeson,
+ tagsoup,
unordered-containers,
feed (>= 0.3.9),
regex-tdfa,
@@ -530,7 +531,6 @@ Executable git-annex
Annex.Path
Annex.Perms
Annex.Queue
- Annex.Quvi
Annex.ReplaceFile
Annex.SpecialRemote
Annex.Ssh
@@ -546,6 +546,7 @@ Executable git-annex
Annex.View.ViewedFile
Annex.Wanted
Annex.WorkTree
+ Annex.YoutubeDl
Assistant
Assistant.Alert
Assistant.Alert.Utility
@@ -1001,6 +1002,7 @@ Executable git-annex
Utility.Glob
Utility.Gpg
Utility.Hash
+ Utility.HtmlDetect
Utility.HumanNumber
Utility.HumanTime
Utility.InodeCache
@@ -1032,7 +1034,6 @@ Executable git-annex
Utility.Process
Utility.Process.Shim
Utility.QuickCheck
- Utility.Quvi
Utility.Rsync
Utility.SRV
Utility.SafeCommand