diff options
Diffstat (limited to 'Utility/Url.hs')
-rw-r--r-- | Utility/Url.hs | 125 |
1 files changed, 51 insertions, 74 deletions
diff --git a/Utility/Url.hs b/Utility/Url.hs index 4137a5d8b..ebcae55ca 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -1,6 +1,6 @@ {- Url downloading. - - - Copyright 2011,2013 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <joey@kitenet.net> - - License: BSD-2-clause -} @@ -21,10 +21,11 @@ module Utility.Url ( import Common import Network.URI -import qualified Network.Browser as Browser -import Network.HTTP -import Data.Either +import Network.HTTP.Conduit +import Network.HTTP.Types import Data.Default +import qualified Data.CaseInsensitive as CI +import qualified Data.ByteString.UTF8 as B8 import qualified Build.SysConfig @@ -60,33 +61,26 @@ check url expected_size = go <$$> exists url Nothing -> (True, True) {- Checks that an url exists and could be successfully downloaded, - - also returning its size if available. - - - - For a file: url, check it directly. - - - - Uses curl otherwise, when available, since curl handles https better - - than does Haskell's Network.Browser. - -} + - also returning its size if available. -} exists :: URLString -> UrlOptions -> IO (Bool, Maybe Integer) exists url uo = case parseURIRelaxed url of - Just u - | uriScheme u == "file:" -> do - s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u) - case s of - Just stat -> return (True, Just $ fromIntegral $ fileSize stat) - Nothing -> dne - | otherwise -> if Build.SysConfig.curl - then do + Just u -> case parseUrl (show u) of + Just req -> existsconduit req `catchNonAsync` const dne + -- http-conduit does not support file:, ftp:, etc urls, + -- so fall back to reading files and using curl. + Nothing + | uriScheme u == "file:" -> do + s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u) + case s of + Just stat -> return (True, Just $ fromIntegral $ fileSize stat) + Nothing -> dne + | Build.SysConfig.curl -> do output <- catchDefaultIO "" $ readProcess "curl" $ toCommand curlparams case lastMaybe (lines output) of - Just ('2':_:_) -> return (True, extractsize output) + Just ('2':_:_) -> return (True, extractlencurl output) _ -> dne - else do - r <- request u HEAD uo - case rspCode r of - (2,_,_) -> return (True, size r) - _ -> return (False, Nothing) + | otherwise -> dne Nothing -> dne where dne = return (False, Nothing) @@ -98,13 +92,28 @@ exists url uo = case parseURIRelaxed url of , Param "-w", Param "%{http_code}" ] ++ concatMap (\h -> [Param "-H", Param h]) (reqHeaders uo) ++ (reqParams uo) - extractsize s = case lastMaybe $ filter ("Content-Length:" `isPrefixOf`) (lines s) of + extractlencurl s = case lastMaybe $ filter ("Content-Length:" `isPrefixOf`) (lines s) of Just l -> case lastMaybe $ words l of Just sz -> readish sz _ -> Nothing _ -> Nothing - - size = liftM Prelude.read . lookupHeader HdrContentLength . rspHeaders + + extractlen resp = readish . B8.toString =<< headMaybe lenheaders + where + lenheaders = map snd $ + filter (\(h, _) -> h == hContentLength) + (responseHeaders resp) + + existsconduit req = withManager $ \mgr -> do + let req' = (addUrlOptions uo req) { method = methodHead } + resp <- http req' mgr + -- forces processing the response before the + -- manager is closed + ret <- if responseStatus resp == ok200 + then return (True, extractlen resp) + else liftIO dne + liftIO $ closeManager mgr + return ret -- works for both wget and curl commands addUserAgent :: UrlOptions -> [CommandParam] -> [CommandParam] @@ -112,6 +121,20 @@ addUserAgent uo ps = case userAgent uo of Nothing -> ps Just ua -> ps ++ [Param "--user-agent", Param ua] +addUrlOptions :: UrlOptions -> Request -> Request +addUrlOptions uo r = r { requestHeaders = requestHeaders r ++ uaheader ++ otherheaders} + where + uaheader = case userAgent uo of + Nothing -> [] + Just ua -> [(hUserAgent, B8.fromString ua)] + otherheaders = map toheader (reqHeaders uo) + toheader s = + let (h, v) = separate (== ':') s + h' = CI.mk (B8.fromString h) + in case v of + (' ':v') -> (h', B8.fromString v') + _ -> (h', B8.fromString v) + {- Used to download large files, such as the contents of keys. - - Uses wget or curl program for its progress bar. (Wget has a better one, @@ -161,52 +184,6 @@ download' quiet url file uo = | quiet = [Param s] | otherwise = [] -{- Uses Network.Browser to make a http request of an url. - - For example, HEAD can be used to check if the url exists, - - or GET used to get the url content (best for small urls). - - - - This does its own redirect following because Browser's is buggy for HEAD - - requests. - - - - Unfortunately, does not handle https, so should only be used - - when curl is not available. - -} -request :: URI -> RequestMethod -> UrlOptions -> IO (Response String) -request url requesttype uo = go 5 url - where - go :: Int -> URI -> IO (Response String) - go 0 _ = error "Too many redirects " - go n u = do - rsp <- Browser.browse $ do - maybe noop Browser.setUserAgent (userAgent uo) - Browser.setErrHandler ignore - Browser.setOutHandler ignore - Browser.setAllowRedirects False - let req = mkRequest requesttype u :: Request_String - snd <$> Browser.request (addheaders req) - case rspCode rsp of - (3,0,x) | x /= 5 -> redir (n - 1) u rsp - _ -> return rsp - addheaders req = setHeaders req (rqHeaders req ++ userheaders) - userheaders = rights $ map parseHeader (reqHeaders uo) - ignore = const noop - redir n u rsp = case retrieveHeaders HdrLocation rsp of - [] -> return rsp - (Header _ newu:_) -> - case parseURIReference newu of - Nothing -> return rsp - Just newURI -> go n $ -#if defined VERSION_network -#if ! MIN_VERSION_network(2,4,0) -#define WITH_OLD_URI -#endif -#endif -#ifdef WITH_OLD_URI - fromMaybe newURI (newURI `relativeTo` u) -#else - newURI `relativeTo` u -#endif - {- Allows for spaces and other stuff in urls, properly escaping them. -} parseURIRelaxed :: URLString -> Maybe URI parseURIRelaxed = parseURI . escapeURIString isAllowedInURI |