diff options
-rw-r--r-- | Utility/Url.hs | 148 | ||||
-rw-r--r-- | debian/changelog | 1 | ||||
-rw-r--r-- | debian/control | 2 | ||||
-rw-r--r-- | git-annex.cabal | 9 |
4 files changed, 72 insertions, 88 deletions
diff --git a/Utility/Url.hs b/Utility/Url.hs index 4137a5d8b..073e36821 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -1,6 +1,6 @@ {- Url downloading. - - - Copyright 2011,2013 Joey Hess <joey@kitenet.net> + - Copyright 2011-2014 Joey Hess <joey@kitenet.net> - - License: BSD-2-clause -} @@ -21,10 +21,11 @@ module Utility.Url ( import Common import Network.URI -import qualified Network.Browser as Browser -import Network.HTTP -import Data.Either +import Network.HTTP.Conduit +import Network.HTTP.Types import Data.Default +import qualified Data.CaseInsensitive as CI +import qualified Data.ByteString.UTF8 as B8 import qualified Build.SysConfig @@ -34,16 +35,43 @@ type Headers = [String] type UserAgent = String -data UrlOptions = UrlOptions +data BaseUrlOptions = BaseUrlOptions { userAgent :: Maybe UserAgent , reqHeaders :: Headers , reqParams :: [CommandParam] } -instance Default UrlOptions +instance Default BaseUrlOptions where def = UrlOptions Nothing [] [] +data UrlOptions = UrlOptions + { urlOptions :: BaseUrlOptions + , applyRequest :: Request -> Request + } + +mkUrlOptions :: BaseUrlOptions -> UrlOptions +mkUrlOptions uo = UrlOptions uo applyrequest + where + applyrequest = \r -> r { requestHeaders = requestHeaders r ++ addedheaders } + addedheaders = uaheader ++ otherheaders + uaheader = case userAgent uo of + Nothing -> [] + Just ua -> [(hUserAgent, B8.fromString ua)] + otherheaders = map toheader (reqHeaders uo) + toheader s = + let (h, v) = separate (== ':') s + h' = CI.mk (B8.fromString h) + in case v of + (' ':v') -> (h', B8.fromString v') + _ -> (h', B8.fromString v) + +addUserAgent :: UrlOptions -> [CommandParam] -> [CommandParam] +addUserAgent (UrlOptions uo) ps = case userAgent uo of + Nothing -> ps + -- --user-agent works for both wget and curl commands + Just ua -> ps ++ [Param "--user-agent", Param ua] + {- Checks that an url exists and could be successfully downloaded, - also checking that its size, if available, matches a specified size. -} checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO Bool @@ -60,33 +88,26 @@ check url expected_size = go <$$> exists url Nothing -> (True, True) {- Checks that an url exists and could be successfully downloaded, - - also returning its size if available. - - - - For a file: url, check it directly. - - - - Uses curl otherwise, when available, since curl handles https better - - than does Haskell's Network.Browser. - -} + - also returning its size if available. -} exists :: URLString -> UrlOptions -> IO (Bool, Maybe Integer) exists url uo = case parseURIRelaxed url of - Just u - | uriScheme u == "file:" -> do - s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u) - case s of - Just stat -> return (True, Just $ fromIntegral $ fileSize stat) - Nothing -> dne - | otherwise -> if Build.SysConfig.curl - then do + Just u -> case parseUrl (show u) of + Just req -> existsconduit req `catchNonAsync` const dne + -- http-conduit does not support file:, ftp:, etc urls, + -- so fall back to reading files and using curl. + Nothing + | uriScheme u == "file:" -> do + s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u) + case s of + Just stat -> return (True, Just $ fromIntegral $ fileSize stat) + Nothing -> dne + | Build.SysConfig.curl -> do output <- catchDefaultIO "" $ readProcess "curl" $ toCommand curlparams case lastMaybe (lines output) of - Just ('2':_:_) -> return (True, extractsize output) + Just ('2':_:_) -> return (True, extractlencurl output) _ -> dne - else do - r <- request u HEAD uo - case rspCode r of - (2,_,_) -> return (True, size r) - _ -> return (False, Nothing) + | otherwise -> dne Nothing -> dne where dne = return (False, Nothing) @@ -98,19 +119,28 @@ exists url uo = case parseURIRelaxed url of , Param "-w", Param "%{http_code}" ] ++ concatMap (\h -> [Param "-H", Param h]) (reqHeaders uo) ++ (reqParams uo) - extractsize s = case lastMaybe $ filter ("Content-Length:" `isPrefixOf`) (lines s) of + extractlencurl s = case lastMaybe $ filter ("Content-Length:" `isPrefixOf`) (lines s) of Just l -> case lastMaybe $ words l of Just sz -> readish sz _ -> Nothing _ -> Nothing - - size = liftM Prelude.read . lookupHeader HdrContentLength . rspHeaders - --- works for both wget and curl commands -addUserAgent :: UrlOptions -> [CommandParam] -> [CommandParam] -addUserAgent uo ps = case userAgent uo of - Nothing -> ps - Just ua -> ps ++ [Param "--user-agent", Param ua] + + extractlen resp = readish . B8.toString =<< headMaybe lenheaders + where + lenheaders = map snd $ + filter (\(h, _) -> h == hContentLength) + (responseHeaders resp) + + existsconduit req = withManager $ \mgr -> do + let req' = (applyRequest uo req) { method = methodHead } + resp <- http req' mgr + -- forces processing the response before the + -- manager is closed + ret <- if responseStatus resp == ok200 + then return (True, extractlen resp) + else liftIO dne + liftIO $ closeManager mgr + return ret {- Used to download large files, such as the contents of keys. - @@ -161,52 +191,6 @@ download' quiet url file uo = | quiet = [Param s] | otherwise = [] -{- Uses Network.Browser to make a http request of an url. - - For example, HEAD can be used to check if the url exists, - - or GET used to get the url content (best for small urls). - - - - This does its own redirect following because Browser's is buggy for HEAD - - requests. - - - - Unfortunately, does not handle https, so should only be used - - when curl is not available. - -} -request :: URI -> RequestMethod -> UrlOptions -> IO (Response String) -request url requesttype uo = go 5 url - where - go :: Int -> URI -> IO (Response String) - go 0 _ = error "Too many redirects " - go n u = do - rsp <- Browser.browse $ do - maybe noop Browser.setUserAgent (userAgent uo) - Browser.setErrHandler ignore - Browser.setOutHandler ignore - Browser.setAllowRedirects False - let req = mkRequest requesttype u :: Request_String - snd <$> Browser.request (addheaders req) - case rspCode rsp of - (3,0,x) | x /= 5 -> redir (n - 1) u rsp - _ -> return rsp - addheaders req = setHeaders req (rqHeaders req ++ userheaders) - userheaders = rights $ map parseHeader (reqHeaders uo) - ignore = const noop - redir n u rsp = case retrieveHeaders HdrLocation rsp of - [] -> return rsp - (Header _ newu:_) -> - case parseURIReference newu of - Nothing -> return rsp - Just newURI -> go n $ -#if defined VERSION_network -#if ! MIN_VERSION_network(2,4,0) -#define WITH_OLD_URI -#endif -#endif -#ifdef WITH_OLD_URI - fromMaybe newURI (newURI `relativeTo` u) -#else - newURI `relativeTo` u -#endif - {- Allows for spaces and other stuff in urls, properly escaping them. -} parseURIRelaxed :: URLString -> Maybe URI parseURIRelaxed = parseURI . escapeURIString isAllowedInURI diff --git a/debian/changelog b/debian/changelog index c55fbabd3..0d884cd81 100644 --- a/debian/changelog +++ b/debian/changelog @@ -39,6 +39,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * git-annex-shell sendkey: Don't fail if a remote asks for a key to be sent that already has a transfer lock file indicating it's being sent to that remote. The remote may have moved between networks, or reconnected. + * Switched from the old haskell HTTP library to http-conduit. -- Joey Hess <joeyh@debian.org> Mon, 21 Jul 2014 14:41:26 -0400 diff --git a/debian/control b/debian/control index 522b7c5cc..1106bc89d 100644 --- a/debian/control +++ b/debian/control @@ -46,6 +46,7 @@ Build-Depends: libghc-dns-dev, libghc-case-insensitive-dev, libghc-http-types-dev, + libghc-http-conduit-dev, libghc-blaze-builder-dev, libghc-crypto-api-dev, libghc-network-multicast-dev, @@ -55,7 +56,6 @@ Build-Depends: libghc-gnutls-dev (>= 0.1.4), libghc-xml-types-dev, libghc-async-dev, - libghc-http-dev, libghc-feed-dev (>= 0.3.9.2), libghc-regex-tdfa-dev [!mipsel !s390], libghc-regex-compat-dev [mipsel s390], diff --git a/git-annex.cabal b/git-annex.cabal index 097fee4cb..58aac39b3 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -96,11 +96,11 @@ Executable git-annex Main-Is: git-annex.hs Build-Depends: MissingH, hslogger, directory, filepath, containers, utf8-string, network (>= 2.0), mtl (>= 2), - bytestring, old-locale, time, HTTP, dataenc, SHA, process, json, + bytestring, old-locale, time, dataenc, SHA, process, json, base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.5), transformers, IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process, SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3), - data-default, case-insensitive + data-default, case-insensitive, http-conduit, http-types CC-Options: -Wall GHC-Options: -Wall Extensions: PackageImports @@ -141,8 +141,7 @@ Executable git-annex CPP-Options: -DWITH_S3 if flag(WebDAV) - Build-Depends: DAV (>= 1.0), - http-client, http-types + Build-Depends: DAV (>= 1.0), http-client CPP-Options: -DWITH_WEBDAV if flag(Assistant) && ! os(solaris) @@ -188,7 +187,7 @@ Executable git-annex if flag(Webapp) Build-Depends: yesod, yesod-default, yesod-static, yesod-form, yesod-core, - http-types, wai, wai-extra, warp, warp-tls, + wai, wai-extra, warp, warp-tls, blaze-builder, crypto-api, hamlet, clientsession, template-haskell, data-default, aeson, path-pieces, shakespeare |