From 0c4c9db9e8860d05eab5b9766d8719ceec1b0b9d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Aug 2017 13:56:12 -0400 Subject: Disable http-client's default 30 second response timeout when HEADing an url to check if it exists. Some web servers take quite a long time to answer a HEAD request. --- CHANGELOG | 3 +++ Remote/S3.hs | 4 +--- Utility/Url.hs | 1 + ...e_via_http_while_wget_fetches_it_just_fine.mdwn | 2 ++ ...ent_1_fa6649208f1882a6bb412ba40cf57fec._comment | 25 ++++++++++++++++++++++ 5 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 doc/bugs/fails_to_verify_presence_via_http_while_wget_fetches_it_just_fine/comment_1_fa6649208f1882a6bb412ba40cf57fec._comment diff --git a/CHANGELOG b/CHANGELOG index 0f4f43f01..96107ad7f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -11,6 +11,9 @@ git-annex (6.20170521) UNRELEASED; urgency=medium directories, by forking a worker process and only deleting the test directory once it exits. * move, copy: Support --batch. + * Disable http-client's default 30 second response timeout when HEADing + an url to check if it exists. Some web servers take quite a long time + to answer a HEAD request. -- Joey Hess Sat, 17 Jun 2017 13:02:24 -0400 diff --git a/Remote/S3.hs b/Remote/S3.hs index a341da488..2d2819824 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -441,13 +441,11 @@ withS3HandleMaybe c gc u a = do Just creds -> do awscreds <- liftIO $ genCredentials creds let awscfg = AWS.Configuration AWS.Timestamp awscreds debugMapper - bracketIO (newManager httpcfg) closeManager $ \mgr -> + bracketIO (newManager managerSettings) closeManager $ \mgr -> a $ Just $ S3Handle mgr awscfg s3cfg Nothing -> a Nothing where s3cfg = s3Configuration c - httpcfg = managerSettings - { managerResponseTimeout = responseTimeoutNone } s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery s3Configuration c = cfg diff --git a/Utility/Url.hs b/Utility/Url.hs index 27bccd1b3..d49953c55 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -56,6 +56,7 @@ managerSettings = tlsManagerSettings #else managerSettings = conduitManagerSettings #endif + { managerResponseTimeout = responseTimeoutNone } type URLString = String diff --git a/doc/bugs/fails_to_verify_presence_via_http_while_wget_fetches_it_just_fine.mdwn b/doc/bugs/fails_to_verify_presence_via_http_while_wget_fetches_it_just_fine.mdwn index ccd0e8543..2dc1e641b 100644 --- a/doc/bugs/fails_to_verify_presence_via_http_while_wget_fetches_it_just_fine.mdwn +++ b/doc/bugs/fails_to_verify_presence_via_http_while_wget_fetches_it_just_fine.mdwn @@ -47,3 +47,5 @@ git-annex: drop: 1 failed [[!meta author=yoh]] + +> [[done]] --[[Joey]] diff --git a/doc/bugs/fails_to_verify_presence_via_http_while_wget_fetches_it_just_fine/comment_1_fa6649208f1882a6bb412ba40cf57fec._comment b/doc/bugs/fails_to_verify_presence_via_http_while_wget_fetches_it_just_fine/comment_1_fa6649208f1882a6bb412ba40cf57fec._comment new file mode 100644 index 000000000..ed23e8902 --- /dev/null +++ b/doc/bugs/fails_to_verify_presence_via_http_while_wget_fetches_it_just_fine/comment_1_fa6649208f1882a6bb412ba40cf57fec._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2017-08-15T17:28:20Z" + content=""" +The normal reason for this to happen is if the size of the file +on the website has changed. git-annex checks the reported size and if it +differs from the versioned file, it knows that the website no longer +contains the same file. + +In this case, it seems to be a cgi program generating a zip file, and the +program actually generated two different zip files when I hit it twice with +wget. (So if git-annex actually did drop the only copy of the version you +downloaded, you'd not be able to download it again. Not that git-annex can know +that; this kind of thing is why trusting the web is not a good idea..) They did +have the same size, but it looks like the web server is not sending a size +header anyway. + +The actual problem is the web server takes a long time to answer a HEAD request +for this URL. It takes 35 seconds before curl is able to HEAD it. I suspect +it's generating the 300 mb zip file before it gets around to finishing +the HEAD request. Not the greatest server behavior, all around. + +That breaks http-client due to its default 30 second timeout. So, will remove that timeout then. +"""]] -- cgit v1.2.3