aboutsummaryrefslogtreecommitdiff
path: root/Remote
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-09-08 15:41:31 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-09-08 15:46:24 -0400
commit5ef1c9b5690057e5b18dc7dcc3627776b400c544 (patch)
treef71d9ad13509977736bd55698cb4ccc18311e091 /Remote
parent23f55c0efdd58f8024d9b0c9e4b02db7b8d27b61 (diff)
S3 export (untested)
It opens a http connection per file exported, but then so does git annex copy --to s3. Decided not to munge exported filenames for IA. Too large a chance of the munging having confusing results. Instead, export of files not supported by IA, eg with spaces in their name, will fail. This commit was supported by the NSF-funded DataLad project.
Diffstat (limited to 'Remote')
-rw-r--r--Remote/S3.hs130
1 files changed, 97 insertions, 33 deletions
diff --git a/Remote/S3.hs b/Remote/S3.hs
index 4b56cce29..96d24d00e 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -59,7 +59,7 @@ remote = RemoteType
, enumerate = const (findSpecialRemotes "s3")
, generate = gen
, setup = s3Setup
- , exportSupported = exportUnsupported
+ , exportSupported = exportIsSupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@@ -86,7 +86,13 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
- , exportActions = exportUnsupported
+ , exportActions = ExportActions
+ { storeExport = storeExportS3 this info
+ , retrieveExport = retrieveExportS3 this info
+ , removeExport = removeExportS3 this info
+ , checkPresentExport = checkPresentExportS3 this info
+ , renameExport = renameExportS3 this info
+ }
, whereisKey = Just (getWebUrls info)
, remoteFsck = Nothing
, repairRepo = Nothing
@@ -107,6 +113,7 @@ s3Setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteG
s3Setup ss mu mcreds c gc = do
u <- maybe (liftIO genUUID) return mu
s3Setup' ss u mcreds c gc
+
s3Setup' :: SetupStage -> UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
s3Setup' ss u mcreds c gc
| configIA c = archiveorg
@@ -170,25 +177,26 @@ prepareS3HandleMaybe r = resourcePrepare $ const $
store :: Remote -> S3Info -> S3Handle -> Storer
store _r info h = fileStorer $ \k f p -> do
- case partSize info of
- Just partsz | partsz > 0 -> do
- fsz <- liftIO $ getFileSize f
- if fsz > partsz
- then multipartupload fsz partsz k f p
- else singlepartupload k f p
- _ -> singlepartupload k f p
+ storeHelper info h f (T.pack $ bucketObject info k) p
-- Store public URL to item in Internet Archive.
when (isIA info && not (isChunkKey k)) $
setUrlPresent webUUID k (iaPublicKeyUrl info k)
return True
+
+storeHelper :: S3Info -> S3Handle -> FilePath -> S3.Object -> MeterUpdate -> Annex ()
+storeHelper info h f object p = case partSize info of
+ Just partsz | partsz > 0 -> do
+ fsz <- liftIO $ getFileSize f
+ if fsz > partsz
+ then multipartupload fsz partsz
+ else singlepartupload
+ _ -> singlepartupload
where
- singlepartupload k f p = do
+ singlepartupload = do
rbody <- liftIO $ httpBodyStorer f p
- void $ sendS3Handle h $ putObject info (T.pack $ bucketObject info k) rbody
- multipartupload fsz partsz k f p = do
+ void $ sendS3Handle h $ putObject info object rbody
+ multipartupload fsz partsz = do
#if MIN_VERSION_aws(0,10,6)
- let object = T.pack (bucketObject info k)
-
let startreq = (S3.postInitiateMultipartUpload (bucket info) object)
{ S3.imuStorageClass = Just (storageClass info)
, S3.imuMetadata = metaHeaders info
@@ -227,16 +235,27 @@ store _r info h = fileStorer $ \k f p -> do
(bucket info) object uploadid (zip [1..] etags)
#else
warning $ "Cannot do multipart upload (partsize " ++ show partsz ++ ") of large file (" ++ show fsz ++ "); built with too old a version of the aws library."
- singlepartupload k f p
+ singlepartupload
#endif
{- Implemented as a fileRetriever, that uses conduit to stream the chunks
- out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -}
retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever
-retrieve _ info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
+retrieve _ info (Just h) = fileRetriever $ \f k p ->
+ retrieveHelper info h (T.pack $ bucketObject info k) f p
+retrieve r info Nothing = case getpublicurl info of
+ Nothing -> \_ _ _ -> do
+ warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
+ return False
+ Just geturl -> fileRetriever $ \f k p ->
+ unlessM (downloadUrl k p [geturl k] f) $
+ giveup "failed to download content"
+
+retrieveHelper :: S3Info -> S3Handle -> S3.Object -> FilePath -> MeterUpdate -> Annex ()
+retrieveHelper info h object f p = liftIO $ runResourceT $ do
(fr, fh) <- allocate (openFile f WriteMode) hClose
- let req = S3.getObject (bucket info) (T.pack $ bucketObject info k)
+ let req = S3.getObject (bucket info) object
S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req
responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed
release fr
@@ -251,13 +270,6 @@ retrieve _ info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
void $ meterupdate sofar'
S.hPut fh bs
sinkprogressfile fh meterupdate sofar'
-retrieve r info Nothing = case getpublicurl info of
- Nothing -> \_ _ _ -> do
- warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
- return False
- Just geturl -> fileRetriever $ \f k p ->
- unlessM (downloadUrl k p [geturl k] f) $
- giveup "failed to download content"
retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
@@ -276,8 +288,19 @@ remove info h k
return $ either (const False) (const True) res
checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent
+checkKey r info Nothing k = case getpublicurl info of
+ Nothing -> do
+ warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
+ giveup "No S3 credentials configured"
+ Just geturl -> do
+ showChecking r
+ withUrlOptions $ checkBoth (geturl k) (keySize k)
checkKey r info (Just h) k = do
showChecking r
+ checkKeyHelper info h (T.pack $ bucketObject info k)
+
+checkKeyHelper :: S3Info -> S3Handle -> S3.Object -> Annex Bool
+checkKeyHelper info h object = do
#if MIN_VERSION_aws(0,10,0)
rsp <- go
return (isJust $ S3.horMetadata rsp)
@@ -287,8 +310,7 @@ checkKey r info (Just h) k = do
return True
#endif
where
- go = sendS3Handle h $
- S3.headObject (bucket info) (T.pack $ bucketObject info k)
+ go = sendS3Handle h $ S3.headObject (bucket info) object
#if ! MIN_VERSION_aws(0,10,0)
{- Catch exception headObject returns when an object is not present
@@ -303,13 +325,50 @@ checkKey r info (Just h) k = do
| otherwise = Nothing
#endif
-checkKey r info Nothing k = case getpublicurl info of
- Nothing -> do
- warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
- giveup "No S3 credentials configured"
- Just geturl -> do
- showChecking r
- withUrlOptions $ checkBoth (geturl k) (keySize k)
+storeExportS3 :: Remote -> S3Info -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
+storeExportS3 r info f _k loc p =
+ catchNonAsync go (\e -> warning (show e) >> return False)
+ where
+ go = withS3Handle (config r) (gitconfig r) (uuid r) $ \h -> do
+ storeHelper info h f (T.pack $ bucketExportLocation info loc) p
+ return True
+
+retrieveExportS3 :: Remote -> S3Info -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Bool
+retrieveExportS3 r info _k loc f p =
+ catchNonAsync go (\e -> warning (show e) >> return False)
+ where
+ go = withS3Handle (config r) (gitconfig r) (uuid r) $ \h -> do
+ retrieveHelper info h (T.pack $ bucketExportLocation info loc) f p
+ return True
+
+removeExportS3 :: Remote -> S3Info -> Key -> ExportLocation -> Annex Bool
+removeExportS3 r info _k loc =
+ catchNonAsync go (\e -> warning (show e) >> return False)
+ where
+ go = withS3Handle (config r) (gitconfig r) (uuid r) $ \h -> do
+ res <- tryNonAsync $ sendS3Handle h $
+ S3.DeleteObject (T.pack $ bucketExportLocation info loc) (bucket info)
+ return $ either (const False) (const True) res
+
+checkPresentExportS3 :: Remote -> S3Info -> Key -> ExportLocation -> Annex Bool
+checkPresentExportS3 r info _k loc =
+ catchNonAsync go (\e -> warning (show e) >> return False)
+ where
+ go = withS3Handle (config r) (gitconfig r) (uuid r) $ \h -> do
+ checkKeyHelper info h (T.pack $ bucketExportLocation info loc)
+
+renameExportS3 :: Remote -> S3Info -> Key -> ExportLocation -> ExportLocation -> Annex Bool
+renameExportS3 r info _k src dest = catchNonAsync go (\e -> warning (show e) >> return False)
+ where
+ go = withS3Handle (config r) (gitconfig r) (uuid r) $ \h -> do
+ -- S3 has no move primitive; copy and delete.
+ void $ sendS3Handle h $ S3.copyObject (bucket info) dstobject
+ (S3.ObjectId (bucket info) srcobject Nothing)
+ S3.CopyMetadata
+ void $ sendS3Handle h $ S3.DeleteObject srcobject (bucket info)
+ return True
+ srcobject = T.pack $ bucketExportLocation info src
+ dstobject = T.pack $ bucketExportLocation info dest
{- Generate the bucket if it does not already exist, including creating the
- UUID file within the bucket.
@@ -474,6 +533,7 @@ data S3Info = S3Info
{ bucket :: S3.Bucket
, storageClass :: S3.StorageClass
, bucketObject :: Key -> String
+ , bucketExportLocation :: ExportLocation -> String
, metaHeaders :: [(T.Text, T.Text)]
, partSize :: Maybe Integer
, isIA :: Bool
@@ -491,6 +551,7 @@ extractS3Info c = do
{ bucket = b
, storageClass = getStorageClass c
, bucketObject = getBucketObject c
+ , bucketExportLocation = getBucketExportLocation c
, metaHeaders = getMetaHeaders c
, partSize = getPartSize c
, isIA = configIA c
@@ -554,6 +615,9 @@ getBucketObject c = munge . key2file
Just "ia" -> iaMunge $ getFilePrefix c ++ s
_ -> getFilePrefix c ++ s
+getBucketExportLocation :: RemoteConfig -> ExportLocation -> FilePath
+getBucketExportLocation c (ExportLocation loc) = getFilePrefix c ++ loc
+
{- Internet Archive limits filenames to a subset of ascii,
- with no whitespace. Other characters are xml entity
- encoded. -}