summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Creds.hs32
-rw-r--r--Remote/S3.hs97
-rw-r--r--debian/changelog1
-rw-r--r--doc/special_remotes/S3.mdwn8
-rw-r--r--doc/tips/using_Amazon_S3.mdwn15
-rw-r--r--doc/todo/credentials-less_access_to_s3.mdwn2
-rw-r--r--doc/todo/credentials-less_access_to_s3/comment_3_26de94e8e3fefc9b47d1510bfb2dac9b._comment10
-rw-r--r--doc/todo/credentials-less_access_to_s3/comment_3_96c8bcb545578280dc02c00d82978f77._comment11
8 files changed, 115 insertions, 61 deletions
diff --git a/Creds.hs b/Creds.hs
index a4074c5c1..68bd1940c 100644
--- a/Creds.hs
+++ b/Creds.hs
@@ -9,8 +9,9 @@ module Creds (
module Types.Creds,
CredPairStorage(..),
setRemoteCredPair,
- getRemoteCredPairFor,
getRemoteCredPair,
+ getRemoteCredPairFor,
+ warnMissingCredPairFor,
getEnvCredPair,
writeCacheCreds,
readCacheCreds,
@@ -74,18 +75,6 @@ setRemoteCredPair _ c storage (Just creds)
{- Gets a remote's credpair, from the environment if set, otherwise
- from the cache in gitAnnexCredsDir, or failing that, from the
- value in RemoteConfig. -}
-getRemoteCredPairFor :: String -> RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair)
-getRemoteCredPairFor this c storage = maybe missing (return . Just) =<< getRemoteCredPair c storage
- where
- (loginvar, passwordvar) = credPairEnvironment storage
- missing = do
- warning $ unwords
- [ "Set both", loginvar
- , "and", passwordvar
- , "to use", this
- ]
- return Nothing
-
getRemoteCredPair :: RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair)
getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
where
@@ -122,6 +111,23 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
return $ Just credpair
_ -> error "bad creds"
+getRemoteCredPairFor :: String -> RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair)
+getRemoteCredPairFor this c storage = go =<< getRemoteCredPair c storage
+ where
+ go Nothing = do
+ warnMissingCredPairFor this storage
+ return Nothing
+ go (Just credpair) = return $ Just credpair
+
+warnMissingCredPairFor :: String -> CredPairStorage -> Annex ()
+warnMissingCredPairFor this storage = warning $ unwords
+ [ "Set both", loginvar
+ , "and", passwordvar
+ , "to use", this
+ ]
+ where
+ (loginvar, passwordvar) = credPairEnvironment storage
+
{- Gets a CredPair from the environment. -}
getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair)
getEnvCredPair storage = liftM2 (,)
diff --git a/Remote/S3.hs b/Remote/S3.hs
index bcaf9d022..7b0cf5b23 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -45,6 +45,9 @@ import Annex.UUID
import Logs.Web
import Utility.Metered
import Utility.DataUnits
+import Annex.Content
+import Annex.Url (withUrlOptions)
+import Utility.Url (checkBoth)
type BucketName = String
@@ -64,7 +67,7 @@ gen r u c gc = do
where
new cst info = Just $ specialRemote c
(prepareS3Handle this $ store this info)
- (prepareS3HandleMaybe this $ retrieve info)
+ (prepareS3HandleMaybe this $ retrieve this info)
(prepareS3Handle this $ remove info)
(prepareS3HandleMaybe this $ checkKey this info)
this
@@ -90,7 +93,7 @@ gen r u c gc = do
, availability = GloballyAvailable
, remotetype = remote
, mkUnavailable = gen r u (M.insert "host" "!dne!" c) gc
- , getInfo = includeCredsInfo c (AWS.creds u) (s3Info c)
+ , getInfo = includeCredsInfo c (AWS.creds u) (s3Info c info)
, claimUrl = Nothing
, checkUrl = Nothing
}
@@ -158,7 +161,7 @@ prepareS3HandleMaybe r = resourcePrepare $ const $
withS3HandleMaybe (config r) (uuid r)
store :: Remote -> S3Info -> S3Handle -> Storer
-store r info h = fileStorer $ \k f p -> do
+store _r info h = fileStorer $ \k f p -> do
case partSize info of
Just partsz | partsz > 0 -> do
fsz <- liftIO $ getFileSize f
@@ -168,15 +171,15 @@ store r info h = fileStorer $ \k f p -> do
_ -> singlepartupload k f p
-- Store public URL to item in Internet Archive.
when (isIA info && not (isChunkKey k)) $
- setUrlPresent webUUID k (iaKeyUrl r k)
+ setUrlPresent webUUID k (iaPublicKeyUrl info k)
return True
where
singlepartupload k f p = do
rbody <- liftIO $ httpBodyStorer f p
- void $ sendS3Handle h $ putObject info (bucketObject info k) rbody
+ void $ sendS3Handle h $ putObject info (T.pack $ bucketObject info k) rbody
multipartupload fsz partsz k f p = do
#if MIN_VERSION_aws(0,10,6)
- let object = bucketObject info k
+ let object = T.pack (bucketObject info k)
let startreq = (S3.postInitiateMultipartUpload (bucket info) object)
{ S3.imuStorageClass = Just (storageClass info)
@@ -222,10 +225,10 @@ store r info h = fileStorer $ \k f p -> do
{- Implemented as a fileRetriever, that uses conduit to stream the chunks
- out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -}
-retrieve :: S3Info -> Maybe S3Handle -> Retriever
-retrieve info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
+retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever
+retrieve _ info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
(fr, fh) <- allocate (openFile f WriteMode) hClose
- let req = S3.getObject (bucket info) (bucketObject info k)
+ let req = S3.getObject (bucket info) (T.pack $ bucketObject info k)
S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req
responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed
release fr
@@ -240,7 +243,13 @@ retrieve info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
void $ meterupdate sofar'
S.hPut fh bs
sinkprogressfile fh meterupdate sofar'
-retrieve _info Nothing = error "TODO"
+retrieve r info Nothing = case getpublicurl info of
+ Nothing -> \_ _ _ -> do
+ warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
+ return False
+ Just geturl -> fileRetriever $ \f k _p ->
+ unlessM (downloadUrl [geturl k] f) $
+ error "failed to download content"
retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
@@ -255,7 +264,7 @@ remove info h k
return False
| otherwise = do
res <- tryNonAsync $ sendS3Handle h $
- S3.DeleteObject (bucketObject info k) (bucket info)
+ S3.DeleteObject (T.pack $ bucketObject info k) (bucket info)
return $ either (const False) (const True) res
checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent
@@ -271,7 +280,7 @@ checkKey r info (Just h) k = do
#endif
where
go = sendS3Handle h $
- S3.headObject (bucket info) (bucketObject info k)
+ S3.headObject (bucket info) (T.pack $ bucketObject info k)
#if ! MIN_VERSION_aws(0,10,0)
{- Catch exception headObject returns when an object is not present
@@ -285,7 +294,14 @@ checkKey r info (Just h) k = do
| AWS.headerErrorMessage e == "ETag missing" = Just ()
| otherwise = Nothing
#endif
-checkKey _r _info Nothing _k = error "TODO"
+
+checkKey r info Nothing k = case getpublicurl info of
+ Nothing -> do
+ warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
+ error "No S3 credentials configured"
+ Just geturl -> do
+ showAction $ "checking " ++ name r
+ withUrlOptions $ checkBoth (geturl k) (keySize k)
{- Generate the bucket if it does not already exist, including creating the
- UUID file within the bucket.
@@ -385,11 +401,13 @@ withS3Handle :: RemoteConfig -> UUID -> (S3Handle -> Annex a) -> Annex a
withS3Handle c u a = do
withS3HandleMaybe c u $ \mh -> case mh of
Just h -> a h
- Nothing -> error "No S3 credentials configured"
+ Nothing -> do
+ warnMissingCredPairFor "S3" (AWS.creds u)
+ error "No S3 credentials configured"
withS3HandleMaybe :: RemoteConfig -> UUID -> (Maybe S3Handle -> Annex a) -> Annex a
withS3HandleMaybe c u a = do
- mcreds <- getRemoteCredPairFor "S3" c (AWS.creds u)
+ mcreds <- getRemoteCredPair c (AWS.creds u)
case mcreds of
Just creds -> do
awscreds <- liftIO $ genCredentials creds
@@ -427,11 +445,12 @@ tryS3 a = (Right <$> a) `catch` (pure . Left)
data S3Info = S3Info
{ bucket :: S3.Bucket
, storageClass :: S3.StorageClass
- , bucketObject :: Key -> T.Text
+ , bucketObject :: Key -> String
, metaHeaders :: [(T.Text, T.Text)]
, partSize :: Maybe Integer
, isIA :: Bool
- , acl :: Maybe S3.CannedAcl
+ , public :: Bool
+ , getpublicurl :: Maybe (Key -> URLString)
}
extractS3Info :: RemoteConfig -> Annex S3Info
@@ -440,17 +459,27 @@ extractS3Info c = do
(error "S3 bucket not configured")
(return . T.pack)
(getBucketName c)
- return $ S3Info
+ let info = S3Info
{ bucket = b
, storageClass = getStorageClass c
- , bucketObject = T.pack . getBucketObject c
+ , bucketObject = getBucketObject c
, metaHeaders = getMetaHeaders c
, partSize = getPartSize c
, isIA = configIA c
- , acl = case M.lookup "public" c of
- Just "yes" -> Just S3.AclPublicRead
- _ -> Nothing
+ , public = case M.lookup "public" c of
+ Just "yes" -> True
+ _ -> False
+ , getpublicurl = case M.lookup "publicurl" c of
+ Just u -> Just $ genericPublicKeyUrl info u
+ Nothing -> case M.lookup "host" c of
+ Just h
+ | h == AWS.s3DefaultHost ->
+ Just $ awsPublicKeyUrl info
+ | isIAHost h ->
+ Just $ iaPublicKeyUrl info
+ _ -> Nothing
}
+ return info
putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject
putObject info file rbody = (S3.putObject (bucket info) file rbody)
@@ -460,6 +489,11 @@ putObject info file rbody = (S3.putObject (bucket info) file rbody)
, S3.poAcl = acl info
}
+acl :: S3Info -> Maybe S3.CannedAcl
+acl info
+ | public info = Just S3.AclPublicRead
+ | otherwise = Nothing
+
getBucketName :: RemoteConfig -> Maybe BucketName
getBucketName = map toLower <$$> M.lookup "bucket"
@@ -514,10 +548,16 @@ isIAHost h = ".archive.org" `isSuffixOf` map toLower h
iaItemUrl :: BucketName -> URLString
iaItemUrl b = "http://archive.org/details/" ++ b
-iaKeyUrl :: Remote -> Key -> URLString
-iaKeyUrl r k = "http://archive.org/download/" ++ b ++ "/" ++ getBucketObject (config r) k
- where
- b = fromMaybe "" $ getBucketName $ config r
+iaPublicKeyUrl :: S3Info -> Key -> URLString
+iaPublicKeyUrl info = genericPublicKeyUrl info $
+ "http://archive.org/download/" ++ T.unpack (bucket info) ++ "/"
+
+awsPublicKeyUrl :: S3Info -> Key -> URLString
+awsPublicKeyUrl info = genericPublicKeyUrl info $
+ "https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/"
+
+genericPublicKeyUrl :: S3Info -> URLString -> Key -> URLString
+genericPublicKeyUrl info baseurl k = baseurl ++ bucketObject info k
genCredentials :: CredPair -> IO AWS.Credentials
genCredentials (keyid, secret) = AWS.Credentials
@@ -539,8 +579,8 @@ debugMapper level t = forward "S3" (T.unpack t)
AWS.Warning -> warningM
AWS.Error -> errorM
-s3Info :: RemoteConfig -> [(String, String)]
-s3Info c = catMaybes
+s3Info :: RemoteConfig -> S3Info -> [(String, String)]
+s3Info c info = catMaybes
[ Just ("bucket", fromMaybe "unknown" (getBucketName c))
, Just ("endpoint", w82s (S.unpack (S3.s3Endpoint s3c)))
, Just ("port", show (S3.s3Port s3c))
@@ -549,6 +589,7 @@ s3Info c = catMaybes
then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c)
else Nothing
, Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
+ , Just ("public", if public info then "yes" else "no")
]
where
s3c = s3Configuration c
diff --git a/debian/changelog b/debian/changelog
index e95c7bebf..01c689b50 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -14,6 +14,7 @@ git-annex (5.20150529) UNRELEASED; urgency=medium
it from the import location.
* S3: Special remotes can be configured with public=yes to allow
the public to access the bucket's content.
+ * S3: Publically accessible buckets can be used without creds.
-- Joey Hess <id@joeyh.name> Sat, 30 May 2015 02:07:18 -0400
diff --git a/doc/special_remotes/S3.mdwn b/doc/special_remotes/S3.mdwn
index 2d9c6cfcd..33f0410bb 100644
--- a/doc/special_remotes/S3.mdwn
+++ b/doc/special_remotes/S3.mdwn
@@ -50,8 +50,12 @@ the S3 remote.
* `public` - Set to "yes" to allow public read access to files sent
to the S3 remote. This is accomplished by setting an ACL when each
- file is uploaded to the remote. So, it can be changed but changes
- will only affect subseqent uploads.
+ file is uploaded to the remote. So, changes to this setting will
+ only affect subseqent uploads.
+
+* `publicurl` - Configure the URL that is used to download files
+ from the bucket when they are available publically.
+ (This is automatically configured for Amazon S3 and the Internet Archive.)
* `partsize` - Amazon S3 only accepts uploads up to a certian file size,
and storing larger files requires a multipart upload process.
diff --git a/doc/tips/using_Amazon_S3.mdwn b/doc/tips/using_Amazon_S3.mdwn
index d6f621786..85d5bc958 100644
--- a/doc/tips/using_Amazon_S3.mdwn
+++ b/doc/tips/using_Amazon_S3.mdwn
@@ -22,16 +22,17 @@ Next, create the S3 remote, and describe it.
The configuration for the S3 remote is stored in git. So to make another
repository use the same S3 remote is easy:
- # cd /media/usb/annex
+ # export AWS_ACCESS_KEY_ID="08TJMT99S3511WOZEP91"
+ # export AWS_SECRET_ACCESS_KEY="s3kr1t"
# git pull laptop
# git annex enableremote cloud
enableremote cloud (gpg) (checking bucket) ok
-Now the remote can be used like any other remote.
+Notice that to enable an existing S3 remote, you have to provide the Amazon
+AWS credentials because they were not stored in the repository. (It is
+possible to configure git-annex to do that, but not the default.)
- # git annex copy my_cool_big_file --to cloud
- copy my_cool_big_file (gpg) (checking cloud...) (to cloud...) ok
- # git annex move video/hackity_hack_and_kaxxt.mov --to cloud
- move video/hackity_hack_and_kaxxt.mov (checking cloud...) (to cloud...) ok
+See [[public_Amazon_S3_remote]] for how to set up a Amazon S3 remote that
+can be used by the public, without them needing AWS credentials.
-See [[special_remotes/S3]] for details.
+See [[special_remotes/S3]] for details about configuring S3 remotes.
diff --git a/doc/todo/credentials-less_access_to_s3.mdwn b/doc/todo/credentials-less_access_to_s3.mdwn
index 39835ac1f..6816b3ff7 100644
--- a/doc/todo/credentials-less_access_to_s3.mdwn
+++ b/doc/todo/credentials-less_access_to_s3.mdwn
@@ -9,3 +9,5 @@ Besides, you never know if and when the file really is available on s3, so runni
How hard would it be to fix that in the s3 remote?
Thanks! --[[anarcat]]
+
+> [[done]] --[[Joey]]
diff --git a/doc/todo/credentials-less_access_to_s3/comment_3_26de94e8e3fefc9b47d1510bfb2dac9b._comment b/doc/todo/credentials-less_access_to_s3/comment_3_26de94e8e3fefc9b47d1510bfb2dac9b._comment
new file mode 100644
index 000000000..8a9ff4db2
--- /dev/null
+++ b/doc/todo/credentials-less_access_to_s3/comment_3_26de94e8e3fefc9b47d1510bfb2dac9b._comment
@@ -0,0 +1,10 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 3"""
+ date="2015-06-05T20:17:38Z"
+ content="""
+The remote can indeed fallback when there are no creds.
+
+Also, git-annex can set an ACL on files it uploads, if the remote is
+configured with public=yes, so no manual ACL setting will be needed.
+"""]]
diff --git a/doc/todo/credentials-less_access_to_s3/comment_3_96c8bcb545578280dc02c00d82978f77._comment b/doc/todo/credentials-less_access_to_s3/comment_3_96c8bcb545578280dc02c00d82978f77._comment
deleted file mode 100644
index 52802de2a..000000000
--- a/doc/todo/credentials-less_access_to_s3/comment_3_96c8bcb545578280dc02c00d82978f77._comment
+++ /dev/null
@@ -1,11 +0,0 @@
-[[!comment format=mdwn
- username="joey"
- subject="""comment 3"""
- date="2015-06-05T17:28:52Z"
- content="""
-Based on
-<http://docs.aws.amazon.com/AmazonS3/latest/dev/WebsiteEndpoints.html>
-and my testing, S3 does not default to allowing public access to buckets. So,
-this seems like something that it makes sense for the user to
-manually configure when setting up a s3 remote.
-"""]]