diff options
-rw-r--r-- | Creds.hs | 32 | ||||
-rw-r--r-- | Remote/S3.hs | 97 | ||||
-rw-r--r-- | debian/changelog | 1 | ||||
-rw-r--r-- | doc/special_remotes/S3.mdwn | 8 | ||||
-rw-r--r-- | doc/tips/using_Amazon_S3.mdwn | 15 | ||||
-rw-r--r-- | doc/todo/credentials-less_access_to_s3.mdwn | 2 | ||||
-rw-r--r-- | doc/todo/credentials-less_access_to_s3/comment_3_26de94e8e3fefc9b47d1510bfb2dac9b._comment | 10 | ||||
-rw-r--r-- | doc/todo/credentials-less_access_to_s3/comment_3_96c8bcb545578280dc02c00d82978f77._comment | 11 |
8 files changed, 115 insertions, 61 deletions
@@ -9,8 +9,9 @@ module Creds ( module Types.Creds, CredPairStorage(..), setRemoteCredPair, - getRemoteCredPairFor, getRemoteCredPair, + getRemoteCredPairFor, + warnMissingCredPairFor, getEnvCredPair, writeCacheCreds, readCacheCreds, @@ -74,18 +75,6 @@ setRemoteCredPair _ c storage (Just creds) {- Gets a remote's credpair, from the environment if set, otherwise - from the cache in gitAnnexCredsDir, or failing that, from the - value in RemoteConfig. -} -getRemoteCredPairFor :: String -> RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair) -getRemoteCredPairFor this c storage = maybe missing (return . Just) =<< getRemoteCredPair c storage - where - (loginvar, passwordvar) = credPairEnvironment storage - missing = do - warning $ unwords - [ "Set both", loginvar - , "and", passwordvar - , "to use", this - ] - return Nothing - getRemoteCredPair :: RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair) getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv where @@ -122,6 +111,23 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv return $ Just credpair _ -> error "bad creds" +getRemoteCredPairFor :: String -> RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair) +getRemoteCredPairFor this c storage = go =<< getRemoteCredPair c storage + where + go Nothing = do + warnMissingCredPairFor this storage + return Nothing + go (Just credpair) = return $ Just credpair + +warnMissingCredPairFor :: String -> CredPairStorage -> Annex () +warnMissingCredPairFor this storage = warning $ unwords + [ "Set both", loginvar + , "and", passwordvar + , "to use", this + ] + where + (loginvar, passwordvar) = credPairEnvironment storage + {- Gets a CredPair from the environment. -} getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair) getEnvCredPair storage = liftM2 (,) diff --git a/Remote/S3.hs b/Remote/S3.hs index bcaf9d022..7b0cf5b23 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -45,6 +45,9 @@ import Annex.UUID import Logs.Web import Utility.Metered import Utility.DataUnits +import Annex.Content +import Annex.Url (withUrlOptions) +import Utility.Url (checkBoth) type BucketName = String @@ -64,7 +67,7 @@ gen r u c gc = do where new cst info = Just $ specialRemote c (prepareS3Handle this $ store this info) - (prepareS3HandleMaybe this $ retrieve info) + (prepareS3HandleMaybe this $ retrieve this info) (prepareS3Handle this $ remove info) (prepareS3HandleMaybe this $ checkKey this info) this @@ -90,7 +93,7 @@ gen r u c gc = do , availability = GloballyAvailable , remotetype = remote , mkUnavailable = gen r u (M.insert "host" "!dne!" c) gc - , getInfo = includeCredsInfo c (AWS.creds u) (s3Info c) + , getInfo = includeCredsInfo c (AWS.creds u) (s3Info c info) , claimUrl = Nothing , checkUrl = Nothing } @@ -158,7 +161,7 @@ prepareS3HandleMaybe r = resourcePrepare $ const $ withS3HandleMaybe (config r) (uuid r) store :: Remote -> S3Info -> S3Handle -> Storer -store r info h = fileStorer $ \k f p -> do +store _r info h = fileStorer $ \k f p -> do case partSize info of Just partsz | partsz > 0 -> do fsz <- liftIO $ getFileSize f @@ -168,15 +171,15 @@ store r info h = fileStorer $ \k f p -> do _ -> singlepartupload k f p -- Store public URL to item in Internet Archive. when (isIA info && not (isChunkKey k)) $ - setUrlPresent webUUID k (iaKeyUrl r k) + setUrlPresent webUUID k (iaPublicKeyUrl info k) return True where singlepartupload k f p = do rbody <- liftIO $ httpBodyStorer f p - void $ sendS3Handle h $ putObject info (bucketObject info k) rbody + void $ sendS3Handle h $ putObject info (T.pack $ bucketObject info k) rbody multipartupload fsz partsz k f p = do #if MIN_VERSION_aws(0,10,6) - let object = bucketObject info k + let object = T.pack (bucketObject info k) let startreq = (S3.postInitiateMultipartUpload (bucket info) object) { S3.imuStorageClass = Just (storageClass info) @@ -222,10 +225,10 @@ store r info h = fileStorer $ \k f p -> do {- Implemented as a fileRetriever, that uses conduit to stream the chunks - out to the file. Would be better to implement a byteRetriever, but - that is difficult. -} -retrieve :: S3Info -> Maybe S3Handle -> Retriever -retrieve info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do +retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever +retrieve _ info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do (fr, fh) <- allocate (openFile f WriteMode) hClose - let req = S3.getObject (bucket info) (bucketObject info k) + let req = S3.getObject (bucket info) (T.pack $ bucketObject info k) S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed release fr @@ -240,7 +243,13 @@ retrieve info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do void $ meterupdate sofar' S.hPut fh bs sinkprogressfile fh meterupdate sofar' -retrieve _info Nothing = error "TODO" +retrieve r info Nothing = case getpublicurl info of + Nothing -> \_ _ _ -> do + warnMissingCredPairFor "S3" (AWS.creds $ uuid r) + return False + Just geturl -> fileRetriever $ \f k _p -> + unlessM (downloadUrl [geturl k] f) $ + error "failed to download content" retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False @@ -255,7 +264,7 @@ remove info h k return False | otherwise = do res <- tryNonAsync $ sendS3Handle h $ - S3.DeleteObject (bucketObject info k) (bucket info) + S3.DeleteObject (T.pack $ bucketObject info k) (bucket info) return $ either (const False) (const True) res checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent @@ -271,7 +280,7 @@ checkKey r info (Just h) k = do #endif where go = sendS3Handle h $ - S3.headObject (bucket info) (bucketObject info k) + S3.headObject (bucket info) (T.pack $ bucketObject info k) #if ! MIN_VERSION_aws(0,10,0) {- Catch exception headObject returns when an object is not present @@ -285,7 +294,14 @@ checkKey r info (Just h) k = do | AWS.headerErrorMessage e == "ETag missing" = Just () | otherwise = Nothing #endif -checkKey _r _info Nothing _k = error "TODO" + +checkKey r info Nothing k = case getpublicurl info of + Nothing -> do + warnMissingCredPairFor "S3" (AWS.creds $ uuid r) + error "No S3 credentials configured" + Just geturl -> do + showAction $ "checking " ++ name r + withUrlOptions $ checkBoth (geturl k) (keySize k) {- Generate the bucket if it does not already exist, including creating the - UUID file within the bucket. @@ -385,11 +401,13 @@ withS3Handle :: RemoteConfig -> UUID -> (S3Handle -> Annex a) -> Annex a withS3Handle c u a = do withS3HandleMaybe c u $ \mh -> case mh of Just h -> a h - Nothing -> error "No S3 credentials configured" + Nothing -> do + warnMissingCredPairFor "S3" (AWS.creds u) + error "No S3 credentials configured" withS3HandleMaybe :: RemoteConfig -> UUID -> (Maybe S3Handle -> Annex a) -> Annex a withS3HandleMaybe c u a = do - mcreds <- getRemoteCredPairFor "S3" c (AWS.creds u) + mcreds <- getRemoteCredPair c (AWS.creds u) case mcreds of Just creds -> do awscreds <- liftIO $ genCredentials creds @@ -427,11 +445,12 @@ tryS3 a = (Right <$> a) `catch` (pure . Left) data S3Info = S3Info { bucket :: S3.Bucket , storageClass :: S3.StorageClass - , bucketObject :: Key -> T.Text + , bucketObject :: Key -> String , metaHeaders :: [(T.Text, T.Text)] , partSize :: Maybe Integer , isIA :: Bool - , acl :: Maybe S3.CannedAcl + , public :: Bool + , getpublicurl :: Maybe (Key -> URLString) } extractS3Info :: RemoteConfig -> Annex S3Info @@ -440,17 +459,27 @@ extractS3Info c = do (error "S3 bucket not configured") (return . T.pack) (getBucketName c) - return $ S3Info + let info = S3Info { bucket = b , storageClass = getStorageClass c - , bucketObject = T.pack . getBucketObject c + , bucketObject = getBucketObject c , metaHeaders = getMetaHeaders c , partSize = getPartSize c , isIA = configIA c - , acl = case M.lookup "public" c of - Just "yes" -> Just S3.AclPublicRead - _ -> Nothing + , public = case M.lookup "public" c of + Just "yes" -> True + _ -> False + , getpublicurl = case M.lookup "publicurl" c of + Just u -> Just $ genericPublicKeyUrl info u + Nothing -> case M.lookup "host" c of + Just h + | h == AWS.s3DefaultHost -> + Just $ awsPublicKeyUrl info + | isIAHost h -> + Just $ iaPublicKeyUrl info + _ -> Nothing } + return info putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject putObject info file rbody = (S3.putObject (bucket info) file rbody) @@ -460,6 +489,11 @@ putObject info file rbody = (S3.putObject (bucket info) file rbody) , S3.poAcl = acl info } +acl :: S3Info -> Maybe S3.CannedAcl +acl info + | public info = Just S3.AclPublicRead + | otherwise = Nothing + getBucketName :: RemoteConfig -> Maybe BucketName getBucketName = map toLower <$$> M.lookup "bucket" @@ -514,10 +548,16 @@ isIAHost h = ".archive.org" `isSuffixOf` map toLower h iaItemUrl :: BucketName -> URLString iaItemUrl b = "http://archive.org/details/" ++ b -iaKeyUrl :: Remote -> Key -> URLString -iaKeyUrl r k = "http://archive.org/download/" ++ b ++ "/" ++ getBucketObject (config r) k - where - b = fromMaybe "" $ getBucketName $ config r +iaPublicKeyUrl :: S3Info -> Key -> URLString +iaPublicKeyUrl info = genericPublicKeyUrl info $ + "http://archive.org/download/" ++ T.unpack (bucket info) ++ "/" + +awsPublicKeyUrl :: S3Info -> Key -> URLString +awsPublicKeyUrl info = genericPublicKeyUrl info $ + "https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/" + +genericPublicKeyUrl :: S3Info -> URLString -> Key -> URLString +genericPublicKeyUrl info baseurl k = baseurl ++ bucketObject info k genCredentials :: CredPair -> IO AWS.Credentials genCredentials (keyid, secret) = AWS.Credentials @@ -539,8 +579,8 @@ debugMapper level t = forward "S3" (T.unpack t) AWS.Warning -> warningM AWS.Error -> errorM -s3Info :: RemoteConfig -> [(String, String)] -s3Info c = catMaybes +s3Info :: RemoteConfig -> S3Info -> [(String, String)] +s3Info c info = catMaybes [ Just ("bucket", fromMaybe "unknown" (getBucketName c)) , Just ("endpoint", w82s (S.unpack (S3.s3Endpoint s3c))) , Just ("port", show (S3.s3Port s3c)) @@ -549,6 +589,7 @@ s3Info c = catMaybes then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c) else Nothing , Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c)) + , Just ("public", if public info then "yes" else "no") ] where s3c = s3Configuration c diff --git a/debian/changelog b/debian/changelog index e95c7bebf..01c689b50 100644 --- a/debian/changelog +++ b/debian/changelog @@ -14,6 +14,7 @@ git-annex (5.20150529) UNRELEASED; urgency=medium it from the import location. * S3: Special remotes can be configured with public=yes to allow the public to access the bucket's content. + * S3: Publically accessible buckets can be used without creds. -- Joey Hess <id@joeyh.name> Sat, 30 May 2015 02:07:18 -0400 diff --git a/doc/special_remotes/S3.mdwn b/doc/special_remotes/S3.mdwn index 2d9c6cfcd..33f0410bb 100644 --- a/doc/special_remotes/S3.mdwn +++ b/doc/special_remotes/S3.mdwn @@ -50,8 +50,12 @@ the S3 remote. * `public` - Set to "yes" to allow public read access to files sent to the S3 remote. This is accomplished by setting an ACL when each - file is uploaded to the remote. So, it can be changed but changes - will only affect subseqent uploads. + file is uploaded to the remote. So, changes to this setting will + only affect subseqent uploads. + +* `publicurl` - Configure the URL that is used to download files + from the bucket when they are available publically. + (This is automatically configured for Amazon S3 and the Internet Archive.) * `partsize` - Amazon S3 only accepts uploads up to a certian file size, and storing larger files requires a multipart upload process. diff --git a/doc/tips/using_Amazon_S3.mdwn b/doc/tips/using_Amazon_S3.mdwn index d6f621786..85d5bc958 100644 --- a/doc/tips/using_Amazon_S3.mdwn +++ b/doc/tips/using_Amazon_S3.mdwn @@ -22,16 +22,17 @@ Next, create the S3 remote, and describe it. The configuration for the S3 remote is stored in git. So to make another repository use the same S3 remote is easy: - # cd /media/usb/annex + # export AWS_ACCESS_KEY_ID="08TJMT99S3511WOZEP91" + # export AWS_SECRET_ACCESS_KEY="s3kr1t" # git pull laptop # git annex enableremote cloud enableremote cloud (gpg) (checking bucket) ok -Now the remote can be used like any other remote. +Notice that to enable an existing S3 remote, you have to provide the Amazon +AWS credentials because they were not stored in the repository. (It is +possible to configure git-annex to do that, but not the default.) - # git annex copy my_cool_big_file --to cloud - copy my_cool_big_file (gpg) (checking cloud...) (to cloud...) ok - # git annex move video/hackity_hack_and_kaxxt.mov --to cloud - move video/hackity_hack_and_kaxxt.mov (checking cloud...) (to cloud...) ok +See [[public_Amazon_S3_remote]] for how to set up a Amazon S3 remote that +can be used by the public, without them needing AWS credentials. -See [[special_remotes/S3]] for details. +See [[special_remotes/S3]] for details about configuring S3 remotes. diff --git a/doc/todo/credentials-less_access_to_s3.mdwn b/doc/todo/credentials-less_access_to_s3.mdwn index 39835ac1f..6816b3ff7 100644 --- a/doc/todo/credentials-less_access_to_s3.mdwn +++ b/doc/todo/credentials-less_access_to_s3.mdwn @@ -9,3 +9,5 @@ Besides, you never know if and when the file really is available on s3, so runni How hard would it be to fix that in the s3 remote? Thanks! --[[anarcat]] + +> [[done]] --[[Joey]] diff --git a/doc/todo/credentials-less_access_to_s3/comment_3_26de94e8e3fefc9b47d1510bfb2dac9b._comment b/doc/todo/credentials-less_access_to_s3/comment_3_26de94e8e3fefc9b47d1510bfb2dac9b._comment new file mode 100644 index 000000000..8a9ff4db2 --- /dev/null +++ b/doc/todo/credentials-less_access_to_s3/comment_3_26de94e8e3fefc9b47d1510bfb2dac9b._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2015-06-05T20:17:38Z" + content=""" +The remote can indeed fallback when there are no creds. + +Also, git-annex can set an ACL on files it uploads, if the remote is +configured with public=yes, so no manual ACL setting will be needed. +"""]] diff --git a/doc/todo/credentials-less_access_to_s3/comment_3_96c8bcb545578280dc02c00d82978f77._comment b/doc/todo/credentials-less_access_to_s3/comment_3_96c8bcb545578280dc02c00d82978f77._comment deleted file mode 100644 index 52802de2a..000000000 --- a/doc/todo/credentials-less_access_to_s3/comment_3_96c8bcb545578280dc02c00d82978f77._comment +++ /dev/null @@ -1,11 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 3""" - date="2015-06-05T17:28:52Z" - content=""" -Based on -<http://docs.aws.amazon.com/AmazonS3/latest/dev/WebsiteEndpoints.html> -and my testing, S3 does not default to allowing public access to buckets. So, -this seems like something that it makes sense for the user to -manually configure when setting up a s3 remote. -"""]] |