summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-08-02 15:51:58 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-08-02 15:51:58 -0400
commit81b339034e8871f211ede2cf3bdb7319ad16caed (patch)
tree8fe89688fc4c224958bb822c9dcbd2cf434c8a4b
parent0ddf8152ce1353bbbcd7c87c3f67063b4aed892b (diff)
S3: support chunking
The assistant defaults to 1MiB chunk size for new S3 special remotes. Which will work around a couple of bugs: http://git-annex.branchable.com/bugs/S3_memory_leaks/ http://git-annex.branchable.com/bugs/S3_upload_not_using_multipart/
-rw-r--r--Remote/S3.hs88
-rw-r--r--Types/Key.hs4
-rw-r--r--debian/changelog2
-rw-r--r--doc/special_remotes/S3.mdwn3
-rw-r--r--doc/tips/using_Amazon_S3.mdwn2
5 files changed, 38 insertions, 61 deletions
diff --git a/Remote/S3.hs b/Remote/S3.hs
index c30d07b8a..ed9122cab 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -25,12 +25,10 @@ import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
-import Remote.Helper.Encryptable
+import Remote.Helper.ChunkedEncryptable
import qualified Remote.Helper.AWS as AWS
-import Crypto
import Creds
import Utility.Metered
-import Annex.Content
import Annex.UUID
import Logs.Web
@@ -47,17 +45,17 @@ remote = RemoteType {
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
where
- new cst = Just $ encryptableRemote c
- (storeEncrypted this)
- (retrieveEncrypted this)
+ new cst = Just $ chunkedEncryptableRemote c
+ (prepareStore this)
+ (prepareRetrieve this)
this
where
this = Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
- storeKey = store this,
- retrieveKeyFile = retrieve this,
+ storeKey = storeKeyDummy,
+ retrieveKeyFile = retreiveKeyFileDummy,
retrieveKeyFileCheap = retrieveCheap this,
removeKey = remove this c,
hasKey = checkPresent this,
@@ -123,67 +121,39 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost
writeUUIDFile archiveconfig u
use archiveconfig
-store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
-store r k _f p = s3Action r False $ \(conn, bucket) ->
- sendAnnex k (void $ remove' r k) $ \src -> do
- ok <- s3Bool =<< storeHelper (conn, bucket) r k p src
+prepareStore :: Remote -> Preparer Storer
+prepareStore r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
+ fileStorer $ \k src p -> do
+ ok <- s3Bool =<< liftIO (store (conn, bucket) r k p src)
-- Store public URL to item in Internet Archive.
- when (ok && isIA (config r)) $
+ when (ok && isIA (config r) && not (isChunkKey k)) $
setUrlPresent k (iaKeyUrl r k)
return ok
-storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
-storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) ->
- -- To get file size of the encrypted content, have to use a temp file.
- -- (An alternative would be chunking to to a constant size.)
- withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do
- liftIO $ encrypt (getGpgEncParams r) cipher (feedFile src) $
- readBytes $ L.writeFile tmp
- s3Bool =<< storeHelper (conn, bucket) r enck p tmp
-
-storeHelper :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ())
-storeHelper (conn, bucket) r k p file = do
- size <- maybe getsize (return . fromIntegral) $ keySize k
- meteredBytes (Just p) size $ \meterupdate ->
- liftIO $ withMeteredFile file meterupdate $ \content -> do
- -- size is provided to S3 so the whole content
- -- does not need to be buffered to calculate it
- let object = S3Object
- bucket (bucketFile r k) ""
- (("Content-Length", show size) : getXheaders (config r))
- content
- sendObject conn $
- setStorageClass (getStorageClass $ config r) object
- where
- getsize = liftIO $ fromIntegral . fileSize <$> getFileStatus file
-
-retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
-retrieve r k _f d p = s3Action r False $ \(conn, bucket) ->
- metered (Just p) k $ \meterupdate -> do
- res <- liftIO $ getObject conn $ bucketKey r bucket k
- case res of
- Right o -> do
- liftIO $ meteredWriteFile meterupdate d $
- obj_data o
- return True
- Left e -> s3Warning e
+store :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> IO (AWSResult ())
+store (conn, bucket) r k p file = do
+ size <- (fromIntegral . fileSize <$> getFileStatus file) :: IO Integer
+ withMeteredFile file p $ \content -> do
+ -- size is provided to S3 so the whole content
+ -- does not need to be buffered to calculate it
+ let object = S3Object
+ bucket (bucketFile r k) ""
+ (("Content-Length", show size) : getXheaders (config r))
+ content
+ sendObject conn $
+ setStorageClass (getStorageClass $ config r) object
+
+prepareRetrieve :: Remote -> Preparer Retriever
+prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
+ byteRetriever $ \k ->
+ liftIO (getObject conn $ bucketKey r bucket k)
+ >>= either s3Error (return . obj_data)
retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
-retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
-retrieveEncrypted r (cipher, enck) k d p = s3Action r False $ \(conn, bucket) ->
- metered (Just p) k $ \meterupdate -> do
- res <- liftIO $ getObject conn $ bucketKey r bucket enck
- case res of
- Right o -> liftIO $ decrypt cipher (\h -> meteredWrite meterupdate h $ obj_data o) $
- readBytes $ \content -> do
- L.writeFile d content
- return True
- Left e -> s3Warning e
-
{- Internet Archive doesn't easily allow removing content.
- While it may remove the file, there are generally other files
- derived from it that it does not remove. -}
diff --git a/Types/Key.hs b/Types/Key.hs
index 154e813ff..5bb41e15f 100644
--- a/Types/Key.hs
+++ b/Types/Key.hs
@@ -15,6 +15,7 @@ module Types.Key (
file2key,
nonChunkKey,
chunkKeyOffset,
+ isChunkKey,
prop_idempotent_key_encode,
prop_idempotent_key_decode
@@ -62,6 +63,9 @@ chunkKeyOffset k = (*)
<$> keyChunkSize k
<*> (pred <$> keyChunkNum k)
+isChunkKey :: Key -> Bool
+isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k)
+
fieldSep :: Char
fieldSep = '-'
diff --git a/debian/changelog b/debian/changelog
index f8b700ae7..d8c23af9b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,7 +1,7 @@
git-annex (5.20140718) UNRELEASED; urgency=medium
* New chunk= option to chunk files stored in special remotes.
- Currently supported by: directory, and all external special remotes.
+ Currently supported by: directory, S3, and all external special remotes.
* Partially transferred files are automatically resumed when using
chunked remotes!
* The old chunksize= option is deprecated. Do not use for new remotes.
diff --git a/doc/special_remotes/S3.mdwn b/doc/special_remotes/S3.mdwn
index 5291a4eb6..fe46948b3 100644
--- a/doc/special_remotes/S3.mdwn
+++ b/doc/special_remotes/S3.mdwn
@@ -18,6 +18,9 @@ the S3 remote.
* `encryption` - One of "none", "hybrid", "shared", or "pubkey".
See [[encryption]].
+* `chunk` - Enables [[chunking]] when storing large files.
+ `chunk=1MiB` is a good starting point for chunking.
+
* `keyid` - Specifies the gpg key to use for [[encryption]].
* `embedcreds` - Optional. Set to "yes" embed the login credentials inside
diff --git a/doc/tips/using_Amazon_S3.mdwn b/doc/tips/using_Amazon_S3.mdwn
index 0c68c7387..ede3f952f 100644
--- a/doc/tips/using_Amazon_S3.mdwn
+++ b/doc/tips/using_Amazon_S3.mdwn
@@ -14,7 +14,7 @@ like "2512E3C7"
Next, create the S3 remote, and describe it.
- # git annex initremote cloud type=S3 keyid=2512E3C7
+ # git annex initremote cloud type=S3 chunk=1MiB keyid=2512E3C7
initremote cloud (encryption setup with gpg key C910D9222512E3C7) (checking bucket) (creating bucket in US) (gpg) ok
# git annex describe cloud "at Amazon's US datacenter"
describe cloud ok