summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Remote/S3.hs41
-rw-r--r--debian/changelog4
-rw-r--r--doc/bugs/S3_memory_leaks.mdwn2
3 files changed, 35 insertions, 12 deletions
diff --git a/Remote/S3.hs b/Remote/S3.hs
index 885396f98..e06a3d6c8 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -15,6 +15,7 @@ import qualified Aws.S3 as S3
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.ByteString.Lazy as L
+import qualified Data.ByteString as S
import qualified Data.Map as M
import Data.Char
import Network.Socket (HostName)
@@ -23,6 +24,7 @@ import Network.HTTP.Client (defaultManagerSettings, managerResponseTimeout, resp
import Network.HTTP.Types
import Control.Monad.Trans.Resource
import Control.Monad.Catch
+import Data.Conduit
import Common.Annex
import Types.Remote
@@ -36,6 +38,7 @@ import qualified Remote.Helper.AWS as AWS
import Creds
import Annex.UUID
import Logs.Web
+import Utility.Metered
type BucketName = String
@@ -145,14 +148,27 @@ store r h = fileStorer $ \k f p -> do
return True
+{- Implemented as a fileRetriever, that uses conduit to stream the chunks
+ - out to the file. Would be better to implement a byteRetriever, but
+ - that is difficult. -}
retrieve :: S3Handle -> Retriever
-retrieve _h = error "TODO"
- {-
- resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
- byteRetriever $ \k sink ->
- liftIO (getObject conn $ bucketKey r bucket k)
- >>= either s3Error (sink . obj_data)
- -}
+retrieve h = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
+ (fr, fh) <- allocate (openFile f WriteMode) hClose
+ let req = S3.getObject (hBucket h) (hBucketObject h k)
+ S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req
+ responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed
+ release fr
+ where
+ sinkprogressfile fh meterupdate sofar = do
+ mbs <- await
+ case mbs of
+ Nothing -> return ()
+ Just bs -> do
+ let sofar' = sofar -- addBytesProcessed $ S.length bs
+ liftIO $ do
+ void $ meterupdate sofar'
+ S.hPut fh bs
+ sinkprogressfile fh meterupdate sofar'
retrieveCheap :: Key -> FilePath -> Annex Bool
retrieveCheap _ _ = return False
@@ -289,9 +305,14 @@ sendS3Handle
=> S3Handle
-> req
-> Annex res
-sendS3Handle h = liftIO . runResourceT . call
- where
- call = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h)
+sendS3Handle h r = liftIO $ runResourceT $ sendS3Handle' h r
+
+sendS3Handle'
+ :: (AWS.Transaction r a, AWS.ServiceConfiguration r ~ S3.S3Configuration)
+ => S3Handle
+ -> r
+ -> ResourceT IO a
+sendS3Handle' h = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h)
withS3Handle :: RemoteConfig -> UUID -> (S3Handle -> Annex a) -> Annex a
withS3Handle c u a = do
diff --git a/debian/changelog b/debian/changelog
index fb30e7736..48d4d9144 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -16,9 +16,9 @@ git-annex (5.20140718) UNRELEASED; urgency=medium
were incompletely repaired before.
* Fix cost calculation for non-encrypted remotes.
* Display exception message when a transfer fails due to an exception.
- * WebDAV: Sped up by avoiding making multiple http connections
+ * WebDAV, S3: Sped up by avoiding making multiple http connections
when storing a file.
- * WebDAV: Avoid buffering whole file in memory when uploading and
+ * WebDAV, S3: Avoid buffering whole file in memory when uploading and
downloading.
* WebDAV: Dropped support for DAV before 1.0.
* S3: Switched to using the haskell aws library.
diff --git a/doc/bugs/S3_memory_leaks.mdwn b/doc/bugs/S3_memory_leaks.mdwn
index 88dd6eaa6..7dc1e5757 100644
--- a/doc/bugs/S3_memory_leaks.mdwn
+++ b/doc/bugs/S3_memory_leaks.mdwn
@@ -7,6 +7,8 @@ Sending a file to S3 causes a slow memory increase toward the file size.
Copying the file back from S3 causes a slow memory increase toward the
file size.
+> [[fixed|done]] too! --[[Joey]]
+
The author of hS3 is aware of the problem, and working on it. I think I
have identified the root cause of the buffering; it's done by hS3 so it can
resend the data if S3 sends it a 307 redirect. --[[Joey]]