diff options
-rw-r--r-- | Remote/S3.hs | 41 | ||||
-rw-r--r-- | debian/changelog | 4 | ||||
-rw-r--r-- | doc/bugs/S3_memory_leaks.mdwn | 2 |
3 files changed, 35 insertions, 12 deletions
diff --git a/Remote/S3.hs b/Remote/S3.hs index 885396f98..e06a3d6c8 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -15,6 +15,7 @@ import qualified Aws.S3 as S3 import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S import qualified Data.Map as M import Data.Char import Network.Socket (HostName) @@ -23,6 +24,7 @@ import Network.HTTP.Client (defaultManagerSettings, managerResponseTimeout, resp import Network.HTTP.Types import Control.Monad.Trans.Resource import Control.Monad.Catch +import Data.Conduit import Common.Annex import Types.Remote @@ -36,6 +38,7 @@ import qualified Remote.Helper.AWS as AWS import Creds import Annex.UUID import Logs.Web +import Utility.Metered type BucketName = String @@ -145,14 +148,27 @@ store r h = fileStorer $ \k f p -> do return True +{- Implemented as a fileRetriever, that uses conduit to stream the chunks + - out to the file. Would be better to implement a byteRetriever, but + - that is difficult. -} retrieve :: S3Handle -> Retriever -retrieve _h = error "TODO" - {- - resourcePrepare (const $ s3Action r False) $ \(conn, bucket) -> - byteRetriever $ \k sink -> - liftIO (getObject conn $ bucketKey r bucket k) - >>= either s3Error (sink . obj_data) - -} +retrieve h = fileRetriever $ \f k p -> liftIO $ runResourceT $ do + (fr, fh) <- allocate (openFile f WriteMode) hClose + let req = S3.getObject (hBucket h) (hBucketObject h k) + S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req + responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed + release fr + where + sinkprogressfile fh meterupdate sofar = do + mbs <- await + case mbs of + Nothing -> return () + Just bs -> do + let sofar' = sofar -- addBytesProcessed $ S.length bs + liftIO $ do + void $ meterupdate sofar' + S.hPut fh bs + sinkprogressfile fh meterupdate sofar' retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False @@ -289,9 +305,14 @@ sendS3Handle => S3Handle -> req -> Annex res -sendS3Handle h = liftIO . runResourceT . call - where - call = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h) +sendS3Handle h r = liftIO $ runResourceT $ sendS3Handle' h r + +sendS3Handle' + :: (AWS.Transaction r a, AWS.ServiceConfiguration r ~ S3.S3Configuration) + => S3Handle + -> r + -> ResourceT IO a +sendS3Handle' h = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h) withS3Handle :: RemoteConfig -> UUID -> (S3Handle -> Annex a) -> Annex a withS3Handle c u a = do diff --git a/debian/changelog b/debian/changelog index fb30e7736..48d4d9144 100644 --- a/debian/changelog +++ b/debian/changelog @@ -16,9 +16,9 @@ git-annex (5.20140718) UNRELEASED; urgency=medium were incompletely repaired before. * Fix cost calculation for non-encrypted remotes. * Display exception message when a transfer fails due to an exception. - * WebDAV: Sped up by avoiding making multiple http connections + * WebDAV, S3: Sped up by avoiding making multiple http connections when storing a file. - * WebDAV: Avoid buffering whole file in memory when uploading and + * WebDAV, S3: Avoid buffering whole file in memory when uploading and downloading. * WebDAV: Dropped support for DAV before 1.0. * S3: Switched to using the haskell aws library. diff --git a/doc/bugs/S3_memory_leaks.mdwn b/doc/bugs/S3_memory_leaks.mdwn index 88dd6eaa6..7dc1e5757 100644 --- a/doc/bugs/S3_memory_leaks.mdwn +++ b/doc/bugs/S3_memory_leaks.mdwn @@ -7,6 +7,8 @@ Sending a file to S3 causes a slow memory increase toward the file size. Copying the file back from S3 causes a slow memory increase toward the file size. +> [[fixed|done]] too! --[[Joey]] + The author of hS3 is aware of the problem, and working on it. I think I have identified the root cause of the buffering; it's done by hS3 so it can resend the data if S3 sends it a 307 redirect. --[[Joey]] |