From 9ec6bdfb526fa6b75a264b6417b24aa7f01adc25 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Dec 2017 13:16:06 -0400 Subject: fix regression in addurl --file caused by youtube-dl support Now youtubeDlCheck downloads the beginning of the url's content and checks if it's html, only when it is does it pass it off the youtube-dl to check if it supports it. This means more work is done for urls that youtube-dl does support, but is probably more efficient for other urls, since it only downloads the first chunk of content, while youtube-dl probably downloads more. As well as the reported bug, this also fixes behavior when an url was added with youtube-dl, but the url content has now changed from a html page to something else. Remote.Web.checkKey used to wrongly succeed in that situation, since youtube-dl said sure it can download that something else. This commit was supported by the NSF-funded DataLad project. --- Annex/YoutubeDl.hs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'Annex') diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs index 4a820cede..071ab1e93 100644 --- a/Annex/YoutubeDl.hs +++ b/Annex/YoutubeDl.hs @@ -10,8 +10,10 @@ module Annex.YoutubeDl where import Annex.Common import qualified Annex import Annex.Content +import Annex.Url import Utility.Url (URLString) import Utility.DiskFree +import Utility.HtmlDetect import Logs.Transfer -- Runs youtube-dl in a work directory, to download a single media file @@ -108,10 +110,19 @@ youtubeDlSupported :: URLString -> Annex Bool youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url -- Check if youtube-dl can find media in an url. +-- +-- youtube-dl supports downloading urls that are not html pages, +-- but we don't want to use it for such urls, since they can be downloaded +-- without it. So, this first downloads part of the content and checks +-- if it's a html page; only then is youtube-dl used. youtubeDlCheck :: URLString -> Annex (Either String Bool) youtubeDlCheck url = catchMsgIO $ do - opts <- youtubeDlOpts [ Param url, Param "--simulate" ] - liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing + uo <- getUrlOptions + liftIO (downloadPartial url uo htmlPrefixLength) >>= \case + Just bs | isHtmlBs bs -> do + opts <- youtubeDlOpts [ Param url, Param "--simulate" ] + liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing + _ -> return False -- Ask youtube-dl for the filename of media in an url. -- -- cgit v1.2.3