From 81b339034e8871f211ede2cf3bdb7319ad16caed Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 15:51:58 -0400 Subject: S3: support chunking The assistant defaults to 1MiB chunk size for new S3 special remotes. Which will work around a couple of bugs: http://git-annex.branchable.com/bugs/S3_memory_leaks/ http://git-annex.branchable.com/bugs/S3_upload_not_using_multipart/ --- Remote/S3.hs | 88 ++++++++++++++++++++---------------------------------------- 1 file changed, 29 insertions(+), 59 deletions(-) (limited to 'Remote') diff --git a/Remote/S3.hs b/Remote/S3.hs index c30d07b8a..ed9122cab 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -25,12 +25,10 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.Encryptable +import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS -import Crypto import Creds import Utility.Metered -import Annex.Content import Annex.UUID import Logs.Web @@ -47,17 +45,17 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc expensiveRemoteCost where - new cst = Just $ encryptableRemote c - (storeEncrypted this) - (retrieveEncrypted this) + new cst = Just $ chunkedEncryptableRemote c + (prepareStore this) + (prepareRetrieve this) this where this = Remote { uuid = u, cost = cst, name = Git.repoDescribe r, - storeKey = store this, - retrieveKeyFile = retrieve this, + storeKey = storeKeyDummy, + retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this c, hasKey = checkPresent this, @@ -123,67 +121,39 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost writeUUIDFile archiveconfig u use archiveconfig -store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r k _f p = s3Action r False $ \(conn, bucket) -> - sendAnnex k (void $ remove' r k) $ \src -> do - ok <- s3Bool =<< storeHelper (conn, bucket) r k p src +prepareStore :: Remote -> Preparer Storer +prepareStore r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) -> + fileStorer $ \k src p -> do + ok <- s3Bool =<< liftIO (store (conn, bucket) r k p src) -- Store public URL to item in Internet Archive. - when (ok && isIA (config r)) $ + when (ok && isIA (config r) && not (isChunkKey k)) $ setUrlPresent k (iaKeyUrl r k) return ok -storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) -> - -- To get file size of the encrypted content, have to use a temp file. - -- (An alternative would be chunking to to a constant size.) - withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do - liftIO $ encrypt (getGpgEncParams r) cipher (feedFile src) $ - readBytes $ L.writeFile tmp - s3Bool =<< storeHelper (conn, bucket) r enck p tmp - -storeHelper :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ()) -storeHelper (conn, bucket) r k p file = do - size <- maybe getsize (return . fromIntegral) $ keySize k - meteredBytes (Just p) size $ \meterupdate -> - liftIO $ withMeteredFile file meterupdate $ \content -> do - -- size is provided to S3 so the whole content - -- does not need to be buffered to calculate it - let object = S3Object - bucket (bucketFile r k) "" - (("Content-Length", show size) : getXheaders (config r)) - content - sendObject conn $ - setStorageClass (getStorageClass $ config r) object - where - getsize = liftIO $ fromIntegral . fileSize <$> getFileStatus file - -retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve r k _f d p = s3Action r False $ \(conn, bucket) -> - metered (Just p) k $ \meterupdate -> do - res <- liftIO $ getObject conn $ bucketKey r bucket k - case res of - Right o -> do - liftIO $ meteredWriteFile meterupdate d $ - obj_data o - return True - Left e -> s3Warning e +store :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> IO (AWSResult ()) +store (conn, bucket) r k p file = do + size <- (fromIntegral . fileSize <$> getFileStatus file) :: IO Integer + withMeteredFile file p $ \content -> do + -- size is provided to S3 so the whole content + -- does not need to be buffered to calculate it + let object = S3Object + bucket (bucketFile r k) "" + (("Content-Length", show size) : getXheaders (config r)) + content + sendObject conn $ + setStorageClass (getStorageClass $ config r) object + +prepareRetrieve :: Remote -> Preparer Retriever +prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) -> + byteRetriever $ \k -> + liftIO (getObject conn $ bucketKey r bucket k) + >>= either s3Error (return . obj_data) retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted r (cipher, enck) k d p = s3Action r False $ \(conn, bucket) -> - metered (Just p) k $ \meterupdate -> do - res <- liftIO $ getObject conn $ bucketKey r bucket enck - case res of - Right o -> liftIO $ decrypt cipher (\h -> meteredWrite meterupdate h $ obj_data o) $ - readBytes $ \content -> do - L.writeFile d content - return True - Left e -> s3Warning e - {- Internet Archive doesn't easily allow removing content. - While it may remove the file, there are generally other files - derived from it that it does not remove. -} -- cgit v1.2.3