From febfa1d66a5cfa52d2c2f5455061f8e04d15a455 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Dec 2017 14:49:55 -0400 Subject: fix regression in addurl --fast caused by youtube-dl support Similar to 9ec6bdfb526fa6b75a264b6417b24aa7f01adc25 but another code path. As well as using youtube-dl unecessarily, it used the filename it comes up with, which while nice for youtube videos, is not right for other files. This means more work is done for urls that youtube-dl does support, but is probably more efficient for other urls, since it only downloads the first chunk of content, while youtube-dl probably downloads more. This commit was supported by the NSF-funded DataLad project. --- Annex/YoutubeDl.hs | 57 ++++++++++++---------- ...ent_2_1cebe1385d2b8a244afc259a78f72ada._comment | 6 +++ ...ent_4_f05e1047c5d22dddba21a9faa3397f0d._comment | 8 +++ 3 files changed, 46 insertions(+), 25 deletions(-) create mode 100644 doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_2_1cebe1385d2b8a244afc259a78f72ada._comment create mode 100644 doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_4_f05e1047c5d22dddba21a9faa3397f0d._comment diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs index 071ab1e93..3aa3d9704 100644 --- a/Annex/YoutubeDl.hs +++ b/Annex/YoutubeDl.hs @@ -106,42 +106,49 @@ youtubeDlTo key url dest = do return Nothing return (fromMaybe False res) -youtubeDlSupported :: URLString -> Annex Bool -youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url - --- Check if youtube-dl can find media in an url. --- -- youtube-dl supports downloading urls that are not html pages, -- but we don't want to use it for such urls, since they can be downloaded -- without it. So, this first downloads part of the content and checks -- if it's a html page; only then is youtube-dl used. -youtubeDlCheck :: URLString -> Annex (Either String Bool) -youtubeDlCheck url = catchMsgIO $ do +htmlOnly :: URLString -> a -> Annex a -> Annex a +htmlOnly url fallback a = do uo <- getUrlOptions liftIO (downloadPartial url uo htmlPrefixLength) >>= \case - Just bs | isHtmlBs bs -> do - opts <- youtubeDlOpts [ Param url, Param "--simulate" ] - liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing - _ -> return False + Just bs | isHtmlBs bs -> a + _ -> return fallback + +youtubeDlSupported :: URLString -> Annex Bool +youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url + +-- Check if youtube-dl can find media in an url. +youtubeDlCheck :: URLString -> Annex (Either String Bool) +youtubeDlCheck url = catchMsgIO $ htmlOnly url False $ do + opts <- youtubeDlOpts [ Param url, Param "--simulate" ] + liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing -- Ask youtube-dl for the filename of media in an url. -- -- (This is not always identical to the filename it uses when downloading.) youtubeDlFileName :: URLString -> Annex (Either String FilePath) -youtubeDlFileName url = flip catchIO (pure . Left . show) $ do - -- Sometimes youtube-dl will fail with an ugly backtrace - -- (eg, http://bugs.debian.org/874321) - -- so catch stderr as well as stdout to avoid the user seeing it. - -- --no-warnings avoids warning messages that are output to stdout. - opts <- youtubeDlOpts - [ Param url - , Param "--get-filename" - , Param "--no-warnings" - ] - (output, ok) <- liftIO $ processTranscript "youtube-dl" (toCommand opts) Nothing - return $ case (ok, lines output) of - (True, (f:_)) | not (null f) -> Right f - _ -> Left "no media in url" +youtubeDlFileName url = flip catchIO (pure . Left . show) $ + htmlOnly url nomedia $ do + -- Sometimes youtube-dl will fail with an ugly backtrace + -- (eg, http://bugs.debian.org/874321) + -- so catch stderr as well as stdout to avoid the user + -- seeing it. --no-warnings avoids warning messages that + -- are output to stdout. + opts <- youtubeDlOpts + [ Param url + , Param "--get-filename" + , Param "--no-warnings" + ] + (output, ok) <- liftIO $ processTranscript "youtube-dl" + (toCommand opts) Nothing + return $ case (ok, lines output) of + (True, (f:_)) | not (null f) -> Right f + _ -> nomedia + where + nomedia = Left "no media in url" youtubeDlOpts :: [CommandParam] -> Annex [CommandParam] youtubeDlOpts addopts = do diff --git a/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_2_1cebe1385d2b8a244afc259a78f72ada._comment b/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_2_1cebe1385d2b8a244afc259a78f72ada._comment new file mode 100644 index 000000000..624d094fd --- /dev/null +++ b/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_2_1cebe1385d2b8a244afc259a78f72ada._comment @@ -0,0 +1,6 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2017-12-06T16:02:55Z" + content=""" +"""]] diff --git a/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_4_f05e1047c5d22dddba21a9faa3397f0d._comment b/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_4_f05e1047c5d22dddba21a9faa3397f0d._comment new file mode 100644 index 000000000..b2aec7da1 --- /dev/null +++ b/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_4_f05e1047c5d22dddba21a9faa3397f0d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2017-12-08T18:48:46Z" + content=""" +That one happened with `git annex addurl --fast $url` so a different code +path. Had to add a html page check to youtubeDlFileName to fix it. +"""]] -- cgit v1.2.3