summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-12-08 14:49:55 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-12-08 14:51:17 -0400
commitfebfa1d66a5cfa52d2c2f5455061f8e04d15a455 (patch)
tree4ba9610026cd6d4524774c576fd7fb59644a0499
parent4f0f992de990476ebd3b2be3d03a8a04a0b42243 (diff)
fix regression in addurl --fast caused by youtube-dl support
Similar to 9ec6bdfb526fa6b75a264b6417b24aa7f01adc25 but another code path. As well as using youtube-dl unecessarily, it used the filename it comes up with, which while nice for youtube videos, is not right for other files. This means more work is done for urls that youtube-dl does support, but is probably more efficient for other urls, since it only downloads the first chunk of content, while youtube-dl probably downloads more. This commit was supported by the NSF-funded DataLad project.
-rw-r--r--Annex/YoutubeDl.hs57
-rw-r--r--doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_2_1cebe1385d2b8a244afc259a78f72ada._comment6
-rw-r--r--doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_4_f05e1047c5d22dddba21a9faa3397f0d._comment8
3 files changed, 46 insertions, 25 deletions
diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs
index 071ab1e93..3aa3d9704 100644
--- a/Annex/YoutubeDl.hs
+++ b/Annex/YoutubeDl.hs
@@ -106,42 +106,49 @@ youtubeDlTo key url dest = do
return Nothing
return (fromMaybe False res)
-youtubeDlSupported :: URLString -> Annex Bool
-youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
-
--- Check if youtube-dl can find media in an url.
---
-- youtube-dl supports downloading urls that are not html pages,
-- but we don't want to use it for such urls, since they can be downloaded
-- without it. So, this first downloads part of the content and checks
-- if it's a html page; only then is youtube-dl used.
-youtubeDlCheck :: URLString -> Annex (Either String Bool)
-youtubeDlCheck url = catchMsgIO $ do
+htmlOnly :: URLString -> a -> Annex a -> Annex a
+htmlOnly url fallback a = do
uo <- getUrlOptions
liftIO (downloadPartial url uo htmlPrefixLength) >>= \case
- Just bs | isHtmlBs bs -> do
- opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
- liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
- _ -> return False
+ Just bs | isHtmlBs bs -> a
+ _ -> return fallback
+
+youtubeDlSupported :: URLString -> Annex Bool
+youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
+
+-- Check if youtube-dl can find media in an url.
+youtubeDlCheck :: URLString -> Annex (Either String Bool)
+youtubeDlCheck url = catchMsgIO $ htmlOnly url False $ do
+ opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
+ liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
-- Ask youtube-dl for the filename of media in an url.
--
-- (This is not always identical to the filename it uses when downloading.)
youtubeDlFileName :: URLString -> Annex (Either String FilePath)
-youtubeDlFileName url = flip catchIO (pure . Left . show) $ do
- -- Sometimes youtube-dl will fail with an ugly backtrace
- -- (eg, http://bugs.debian.org/874321)
- -- so catch stderr as well as stdout to avoid the user seeing it.
- -- --no-warnings avoids warning messages that are output to stdout.
- opts <- youtubeDlOpts
- [ Param url
- , Param "--get-filename"
- , Param "--no-warnings"
- ]
- (output, ok) <- liftIO $ processTranscript "youtube-dl" (toCommand opts) Nothing
- return $ case (ok, lines output) of
- (True, (f:_)) | not (null f) -> Right f
- _ -> Left "no media in url"
+youtubeDlFileName url = flip catchIO (pure . Left . show) $
+ htmlOnly url nomedia $ do
+ -- Sometimes youtube-dl will fail with an ugly backtrace
+ -- (eg, http://bugs.debian.org/874321)
+ -- so catch stderr as well as stdout to avoid the user
+ -- seeing it. --no-warnings avoids warning messages that
+ -- are output to stdout.
+ opts <- youtubeDlOpts
+ [ Param url
+ , Param "--get-filename"
+ , Param "--no-warnings"
+ ]
+ (output, ok) <- liftIO $ processTranscript "youtube-dl"
+ (toCommand opts) Nothing
+ return $ case (ok, lines output) of
+ (True, (f:_)) | not (null f) -> Right f
+ _ -> nomedia
+ where
+ nomedia = Left "no media in url"
youtubeDlOpts :: [CommandParam] -> Annex [CommandParam]
youtubeDlOpts addopts = do
diff --git a/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_2_1cebe1385d2b8a244afc259a78f72ada._comment b/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_2_1cebe1385d2b8a244afc259a78f72ada._comment
new file mode 100644
index 000000000..624d094fd
--- /dev/null
+++ b/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_2_1cebe1385d2b8a244afc259a78f72ada._comment
@@ -0,0 +1,6 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 2"""
+ date="2017-12-06T16:02:55Z"
+ content="""
+"""]]
diff --git a/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_4_f05e1047c5d22dddba21a9faa3397f0d._comment b/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_4_f05e1047c5d22dddba21a9faa3397f0d._comment
new file mode 100644
index 000000000..b2aec7da1
--- /dev/null
+++ b/doc/bugs/regression_-_yt__58___prefix_for___34__regular__34___urls/comment_4_f05e1047c5d22dddba21a9faa3397f0d._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 4"""
+ date="2017-12-08T18:48:46Z"
+ content="""
+That one happened with `git annex addurl --fast $url` so a different code
+path. Had to add a html page check to youtubeDlFileName to fix it.
+"""]]