summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex.hs23
-rw-r--r--Command/Map.hs4
-rw-r--r--Config.hs68
-rw-r--r--Content.hs3
-rw-r--r--GitRepo.hs30
-rw-r--r--Makefile1
-rw-r--r--Remote.hs32
-rw-r--r--Remote/Git.hs (renamed from Remote/GitRemote.hs)106
-rw-r--r--Remote/S3.hs125
-rw-r--r--RemoteClass.hs5
-rw-r--r--Ssh.hs4
-rw-r--r--UUID.hs5
-rw-r--r--Version.hs3
-rw-r--r--debian/changelog8
-rw-r--r--doc/backends.mdwn7
-rw-r--r--doc/bugs/git-annex_has_issues_with_git_when_staging__47__commiting_logs.mdwn8
-rw-r--r--doc/git-annex.mdwn37
-rw-r--r--doc/internals.mdwn11
-rw-r--r--doc/special_remotes.mdwn9
-rw-r--r--doc/special_remotes/Amazon_S3.mdwn46
-rw-r--r--doc/todo/S3.mdwn2
-rw-r--r--doc/transferring_data.mdwn3
-rw-r--r--doc/use_case/Alice.mdwn6
-rw-r--r--doc/walkthrough.mdwn1
-rw-r--r--doc/walkthrough/moving_file_content_between_repositories.mdwn4
-rw-r--r--doc/walkthrough/using_Amazon_S3.mdwn29
26 files changed, 452 insertions, 128 deletions
diff --git a/Annex.hs b/Annex.hs
index bb26608f4..2723c6a00 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -17,12 +17,9 @@ module Annex (
queue,
queueRun,
queueRunAt,
- setConfig,
- repoConfig
) where
import Control.Monad.State
-import Data.Maybe
import qualified GitRepo as Git
import qualified GitQueue
@@ -119,23 +116,3 @@ queueRunAt n = do
state <- get
let q = repoqueue state
when (GitQueue.size q >= n) queueRun
-
-{- Changes a git config setting in both internal state and .git/config -}
-setConfig :: String -> String -> Annex ()
-setConfig k value = do
- g <- Annex.gitRepo
- liftIO $ Git.run g "config" [Param k, Param value]
- -- re-read git config and update the repo's state
- g' <- liftIO $ Git.configRead g
- Annex.changeState $ \s -> s { Annex.repo = g' }
-
-{- Looks up a per-remote config option in git config.
- - Failing that, tries looking for a global config option. -}
-repoConfig :: Git.Repo -> String -> String -> Annex String
-repoConfig r key def = do
- g <- Annex.gitRepo
- let def' = Git.configGet g global def
- return $ Git.configGet g local def'
- where
- local = "remote." ++ fromMaybe "" (Git.repoRemoteName r) ++ ".annex-" ++ key
- global = "annex." ++ key
diff --git a/Command/Map.hs b/Command/Map.hs
index 6206aeeb5..dc3acb56e 100644
--- a/Command/Map.hs
+++ b/Command/Map.hs
@@ -16,7 +16,7 @@ import Data.List.Utils
import Command
import qualified Annex
import qualified GitRepo as Git
-import qualified Remote.GitRemote
+import qualified Remote.Git
import Messages
import Types
import Utility
@@ -203,7 +203,7 @@ tryScan r
Git.hConfigRead r
configlist =
- Remote.GitRemote.onRemote r (pipedconfig, Nothing) "configlist" []
+ Remote.Git.onRemote r (pipedconfig, Nothing) "configlist" []
manualconfiglist = do
let sshcmd =
"cd " ++ shellEscape(Git.workTree r) ++ " && " ++
diff --git a/Config.hs b/Config.hs
new file mode 100644
index 000000000..c821364ce
--- /dev/null
+++ b/Config.hs
@@ -0,0 +1,68 @@
+{- Git configuration
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Config where
+
+import Data.Maybe
+import Control.Monad.State (liftIO)
+
+import qualified GitRepo as Git
+import qualified Annex
+import Types
+import Utility
+
+type ConfigKey = String
+
+{- Changes a git config setting in both internal state and .git/config -}
+setConfig :: ConfigKey -> String -> Annex ()
+setConfig k value = do
+ g <- Annex.gitRepo
+ liftIO $ Git.run g "config" [Param k, Param value]
+ -- re-read git config and update the repo's state
+ g' <- liftIO $ Git.configRead g
+ Annex.changeState $ \s -> s { Annex.repo = g' }
+
+{- Looks up a per-remote config setting in git config.
+ - Failing that, tries looking for a global config option. -}
+getConfig :: Git.Repo -> ConfigKey -> String -> Annex String
+getConfig r key def = do
+ g <- Annex.gitRepo
+ let def' = Git.configGet g ("annex." ++ key) def
+ return $ Git.configGet g (remoteConfig r key) def'
+
+remoteConfig :: Git.Repo -> ConfigKey -> String
+remoteConfig r key = "remote." ++ fromMaybe "" (Git.repoRemoteName r) ++ ".annex-" ++ key
+
+{- Calculates cost for a remote.
+ -
+ - The default cost is 100 for local repositories, and 200 for remote
+ - repositories; it can also be configured by remote.<name>.annex-cost
+ -}
+remoteCost :: Git.Repo -> Annex Int
+remoteCost r = do
+ c <- getConfig r "cost" ""
+ if not $ null c
+ then return $ read c
+ else if not $ Git.repoIsUrl r
+ then return 100
+ else return 200
+
+{- Checks if a repo should be ignored, based either on annex-ignore
+ - setting, or on command-line options. Allows command-line to override
+ - annex-ignore. -}
+remoteNotIgnored :: Git.Repo -> Annex Bool
+remoteNotIgnored r = do
+ ignored <- getConfig r "ignore" "false"
+ to <- match Annex.toremote
+ from <- match Annex.fromremote
+ if to || from
+ then return True
+ else return $ not $ Git.configTrue ignored
+ where
+ match a = do
+ n <- Annex.getState a
+ return $ n == Git.repoRemoteName r
diff --git a/Content.hs b/Content.hs
index 7aa30f7ff..88e8dbc00 100644
--- a/Content.hs
+++ b/Content.hs
@@ -40,6 +40,7 @@ import Utility
import StatFS
import Key
import DataUnits
+import Config
{- Checks if a given key is currently present in the gitAnnexLocation. -}
inAnnex :: Key -> Annex Bool
@@ -121,7 +122,7 @@ checkDiskSpace = checkDiskSpace' 0
checkDiskSpace' :: Integer -> Key -> Annex ()
checkDiskSpace' adjustment key = do
g <- Annex.gitRepo
- r <- Annex.repoConfig g "diskreserve" ""
+ r <- getConfig g "diskreserve" ""
let reserve = case readSize dataUnits r of
Nothing -> megabyte
Just v -> v
diff --git a/GitRepo.hs b/GitRepo.hs
index ad58b28a0..1b14e4a63 100644
--- a/GitRepo.hs
+++ b/GitRepo.hs
@@ -12,6 +12,7 @@ module GitRepo (
Repo,
repoFromCwd,
repoFromAbsPath,
+ repoFromUnknown,
repoFromUrl,
localToUrl,
repoIsUrl,
@@ -41,6 +42,7 @@ module GitRepo (
remotes,
remotesAdd,
repoRemoteName,
+ repoRemoteNameSet,
inRepo,
notInRepo,
stagedFiles,
@@ -81,7 +83,7 @@ import Utility
{- There are two types of repositories; those on local disk and those
- accessed via an URL. -}
-data RepoLocation = Dir FilePath | Url URI
+data RepoLocation = Dir FilePath | Url URI | Unknown
deriving (Show, Eq)
data Repo = Repo {
@@ -123,6 +125,10 @@ repoFromUrl url
Just v -> v
Nothing -> error $ "bad url " ++ url
+{- Creates a repo that has an unknown location. -}
+repoFromUnknown :: Repo
+repoFromUnknown = newFrom Unknown
+
{- Converts a Local Repo into a remote repo, using the reference repo
- which is assumed to be on the same host. -}
localToUrl :: Repo -> Repo -> Repo
@@ -141,11 +147,13 @@ repoDescribe :: Repo -> String
repoDescribe Repo { remoteName = Just name } = name
repoDescribe Repo { location = Url url } = show url
repoDescribe Repo { location = Dir dir } = dir
+repoDescribe Repo { location = Unknown } = "UNKNOWN"
{- Location of the repo, either as a path or url. -}
repoLocation :: Repo -> String
repoLocation Repo { location = Url url } = show url
repoLocation Repo { location = Dir dir } = dir
+repoLocation Repo { location = Unknown } = undefined
{- Constructs and returns an updated version of a repo with
- different remotes list. -}
@@ -158,6 +166,14 @@ repoRemoteName :: Repo -> Maybe String
repoRemoteName Repo { remoteName = Just name } = Just name
repoRemoteName _ = Nothing
+{- Sets the name of a remote based on the git config key, such as
+ "remote.foo.url". -}
+repoRemoteNameSet :: Repo -> String -> Repo
+repoRemoteNameSet r k = r { remoteName = Just basename }
+ where
+ basename = join "." $ reverse $ drop 1 $
+ reverse $ drop 1 $ split "." k
+
{- Some code needs to vary between URL and normal repos,
- or bare and non-bare, these functions help with that. -}
repoIsUrl :: Repo -> Bool
@@ -218,6 +234,7 @@ gitDir repo
workTree :: Repo -> FilePath
workTree r@(Repo { location = Url _ }) = urlPath r
workTree (Repo { location = Dir d }) = d
+workTree Repo { location = Unknown } = undefined
{- Given a relative or absolute filename in a repository, calculates the
- name to use to refer to the file relative to a git repository's top.
@@ -393,10 +410,6 @@ configStore repo s = do
where
r = repo { config = configParse s }
-{- Checks if a string from git config is a true value. -}
-configTrue :: String -> Bool
-configTrue s = map toLower s == "true"
-
{- Calculates a list of a repo's configured remotes, by parsing its config. -}
configRemotes :: Repo -> IO [Repo]
configRemotes repo = mapM construct remotepairs
@@ -404,10 +417,9 @@ configRemotes repo = mapM construct remotepairs
remotepairs = Map.toList $ filterremotes $ config repo
filterremotes = Map.filterWithKey (\k _ -> isremote k)
isremote k = startswith "remote." k && endswith ".url" k
- remotename k = join "." $ reverse $ drop 1 $ reverse $ drop 1 $ split "." k
construct (k,v) = do
r <- gen v
- return $ r { remoteName = Just $ remotename k }
+ return $ repoRemoteNameSet r k
gen v | scpstyle v = repoFromUrl $ scptourl v
| isURI v = repoFromUrl v
| otherwise = repoFromRemotePath v repo
@@ -423,6 +435,10 @@ configRemotes repo = mapM construct remotepairs
| d !! 0 == '~' = '/':dir
| otherwise = "/~/" ++ dir
+{- Checks if a string from git config is a true value. -}
+configTrue :: String -> Bool
+configTrue s = map toLower s == "true"
+
{- Parses git config --list output into a config map. -}
configParse :: String -> Map.Map String String
configParse s = Map.fromList $ map pair $ lines s
diff --git a/Makefile b/Makefile
index e1aaf8ec3..8e1664503 100644
--- a/Makefile
+++ b/Makefile
@@ -58,6 +58,7 @@ docs: $(mans)
--no-usedirs --disable-plugin=openid --plugin=sidebar \
--underlaydir=/dev/null --disable-plugin=shortcut \
--disable-plugin=smiley \
+ --plugin=comments --set comments_pagespec="*" \
--exclude='news/.*'
clean:
diff --git a/Remote.hs b/Remote.hs
index a7136ea65..6aab4a741 100644
--- a/Remote.hs
+++ b/Remote.hs
@@ -25,18 +25,37 @@ module Remote (
import Control.Monad.State (liftIO)
import Control.Monad (when, liftM)
import Data.List
+import Data.String.Utils
import RemoteClass
-import qualified Remote.GitRemote
+import qualified Remote.Git
+import qualified Remote.S3
import Types
import UUID
import qualified Annex
import Trust
import LocationLog
-
-{- add generators for new Remotes here -}
-generators :: [Annex [Remote Annex]]
-generators = [Remote.GitRemote.generate]
+import Messages
+
+{- Add generators for new Remotes here. -}
+generators :: [Annex (RemoteGenerator Annex)]
+generators =
+ [ Remote.Git.generate
+ , Remote.S3.generate
+ ]
+
+{- Runs a list of generators. -}
+runGenerators :: [Annex (RemoteGenerator Annex)] -> Annex [Remote Annex]
+runGenerators gs = do
+ (actions, expensive) <- collect ([], []) gs
+ when (not $ null expensive) $
+ showNote $ "getting UUID for " ++ join ", " expensive
+ sequence actions
+ where
+ collect v [] = return v
+ collect (actions, expensive) (x:xs) = do
+ (a, e) <- x
+ collect (a++actions, e++expensive) xs
{- Builds a list of all available Remotes.
- Since doing so can be expensive, the list is cached in the Annex. -}
@@ -45,8 +64,7 @@ genList = do
rs <- Annex.getState Annex.remotes
if null rs
then do
- lists <- sequence generators
- let rs' = concat lists
+ rs' <- runGenerators generators
Annex.changeState $ \s -> s { Annex.remotes = rs' }
return rs'
else return rs
diff --git a/Remote/GitRemote.hs b/Remote/Git.hs
index 43e75b97b..9021a2230 100644
--- a/Remote/GitRemote.hs
+++ b/Remote/Git.hs
@@ -5,7 +5,7 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-module Remote.GitRemote (
+module Remote.Git (
generate,
onRemote
) where
@@ -13,9 +13,8 @@ module Remote.GitRemote (
import Control.Exception.Extensible
import Control.Monad.State (liftIO)
import qualified Data.Map as Map
-import Data.String.Utils
import System.Cmd.Utils
-import Control.Monad (unless, filterM)
+import Control.Monad (filterM, liftM)
import RemoteClass
import Types
@@ -29,18 +28,34 @@ import Messages
import CopyFile
import RsyncFile
import Ssh
+import Config
-generate :: Annex [Remote Annex]
+generate :: Annex (RemoteGenerator Annex)
generate = do
- readConfigs
g <- Annex.gitRepo
- rs <- filterM repoNotIgnored (Git.remotes g)
- mapM genRemote rs
+ allremotes <- filterM remoteNotIgnored $ Git.remotes g
+
+ {- It's assumed to be cheap to read the config of non-URL remotes,
+ - so this is done each time git-annex is run. Conversely,
+ - the config of an URL remote is only read when there is no
+ - cached UUID value. -}
+ let cheap = filter (not . Git.repoIsUrl) allremotes
+ let expensive = filter Git.repoIsUrl allremotes
+ expensive_todo <- filterM cachedUUID expensive
+ let skip = filter (`notElem` expensive_todo) expensive
+ let todo = cheap++expensive_todo
+
+ let actions = map genRemote skip ++
+ map (\r -> genRemote =<< tryGitConfigRead r) todo
+ return (actions, map Git.repoDescribe expensive_todo)
+
+ where
+ cachedUUID r = liftM null $ getUUID r
genRemote :: Git.Repo -> Annex (Remote Annex)
genRemote r = do
u <- getUUID r
- c <- repoCost r
+ c <- remoteCost r
return Remote {
uuid = u,
cost = c,
@@ -52,40 +67,13 @@ genRemote r = do
hasKeyCheap = not (Git.repoIsUrl r)
}
-{- Reads the configs of git remotes.
- -
- - It's assumed to be cheap to read the config of non-URL remotes,
- - so this is done each time git-annex is run. Conversely,
- - the config of an URL remote is only read when there is no
- - cached UUID value.
- -}
-readConfigs :: Annex ()
-readConfigs = do
- g <- Annex.gitRepo
- allremotes <- filterM repoNotIgnored $ Git.remotes g
- let cheap = filter (not . Git.repoIsUrl) allremotes
- let expensive = filter Git.repoIsUrl allremotes
- doexpensive <- filterM cachedUUID expensive
- unless (null doexpensive) $
- showNote $ "getting UUID for " ++
- list doexpensive ++ "..."
- let todo = cheap ++ doexpensive
- unless (null todo) $ do
- mapM_ tryGitConfigRead todo
- where
- cachedUUID r = do
- u <- getUUID r
- return $ null u
-
-{- The git configs for the git repo's remotes is not read on startup
- - because reading it may be expensive. This function tries to read the
- - config for a specified remote, and updates state. If successful, it
- - returns the updated git repo. -}
-tryGitConfigRead :: Git.Repo -> Annex (Either Git.Repo Git.Repo)
+{- Tries to read the config for a specified remote, updates state, and
+ - returns the updated repo. -}
+tryGitConfigRead :: Git.Repo -> Annex Git.Repo
tryGitConfigRead r
- | not $ Map.null $ Git.configMap r = return $ Right r -- already read
+ | not $ Map.null $ Git.configMap r = return r -- already read
| Git.repoIsSsh r = store $ onRemote r (pipedconfig, r) "configlist" []
- | Git.repoIsUrl r = return $ Left r
+ | Git.repoIsUrl r = return r
| otherwise = store $ safely $ Git.configRead r
where
-- Reading config can fail due to IO error or
@@ -104,43 +92,13 @@ tryGitConfigRead r
let l = Git.remotes g
let g' = Git.remotesAdd g $ exchange l r'
Annex.changeState $ \s -> s { Annex.repo = g' }
- return $ Right r'
+ return r'
exchange [] _ = []
exchange (old:ls) new =
if Git.repoRemoteName old == Git.repoRemoteName new
then new : exchange ls new
else old : exchange ls new
-{- Calculates cost for a repo.
- -
- - The default cost is 100 for local repositories, and 200 for remote
- - repositories; it can also be configured by remote.<name>.annex-cost
- -}
-repoCost :: Git.Repo -> Annex Int
-repoCost r = do
- c <- Annex.repoConfig r "cost" ""
- if not $ null c
- then return $ read c
- else if Git.repoIsUrl r
- then return 200
- else return 100
-
-{- Checks if a repo should be ignored, based either on annex-ignore
- - setting, or on command-line options. Allows command-line to override
- - annex-ignore. -}
-repoNotIgnored :: Git.Repo -> Annex Bool
-repoNotIgnored r = do
- ignored <- Annex.repoConfig r "ignore" "false"
- to <- match Annex.toremote
- from <- match Annex.fromremote
- if to || from
- then return True
- else return $ not $ Git.configTrue ignored
- where
- match a = do
- n <- Annex.getState a
- return $ n == Git.repoRemoteName r
-
{- Checks if a given remote has the content for a key inAnnex.
- If the remote cannot be accessed, returns a Left error.
-}
@@ -219,7 +177,7 @@ rsyncParams r sending key file = do
]
-- Convert the ssh command into rsync command line.
let eparam = rsyncShell (Param shellcmd:shellparams)
- o <- Annex.repoConfig r "rsync-options" ""
+ o <- getConfig r "rsync-options" ""
let base = options ++ map Param (words o) ++ eparam
if sending
then return $ base ++ [dummy, File file]
@@ -262,7 +220,3 @@ git_annex_shell r command params
shellopts = (Param command):(File dir):params
sshcmd = shellcmd ++ " " ++
unwords (map shellEscape $ toCommand shellopts)
-
-{- Human visible list of remotes. -}
-list :: [Git.Repo] -> String
-list remotes = join ", " $ map Git.repoDescribe remotes
diff --git a/Remote/S3.hs b/Remote/S3.hs
new file mode 100644
index 000000000..23ec33bb5
--- /dev/null
+++ b/Remote/S3.hs
@@ -0,0 +1,125 @@
+{- Amazon S3 remotes.
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Remote.S3 (generate) where
+
+import Network.AWS.AWSConnection
+import Network.AWS.S3Object
+import Network.AWS.S3Bucket
+import Network.AWS.AWSResult
+import qualified Data.ByteString.Lazy.Char8 as L
+import qualified Data.Map as Map
+import Data.String.Utils
+import Control.Monad (filterM, liftM, when)
+import Control.Monad.State (liftIO)
+import System.Environment
+import Data.Char
+import Messages
+
+import RemoteClass
+import Types
+import qualified GitRepo as Git
+import qualified Annex
+import UUID
+import Config
+
+generate :: Annex (RemoteGenerator Annex)
+generate = do
+ g <- Annex.gitRepo
+ remotes <- filterM remoteNotIgnored $ findS3Remotes g
+ todo <- filterM cachedUUID remotes
+ let ok = filter (`notElem` todo) remotes
+
+ let actions = map (\r -> genRemote r =<< getUUID r) ok ++
+ map (\r -> genRemote r =<< getS3UUID r) todo
+ return (actions, map Git.repoDescribe todo)
+
+ where
+ cachedUUID r = liftM null $ getUUID r
+
+{- S3 remotes have a remote.<name>.annex-s3-bucket config setting.
+ - Git.Repo does not normally generate remotes for things that
+ - have no configured url, so the Git.Repo objects have to be
+ - constructed as coming from an unknown location. -}
+findS3Remotes :: Git.Repo -> [Git.Repo]
+findS3Remotes r = map construct remotepairs
+ where
+ remotepairs = Map.toList $ filterremotes $ Git.configMap r
+ filterremotes = Map.filterWithKey (\k _ -> s3remote k)
+ construct (k,_) = Git.repoRemoteNameSet Git.repoFromUnknown k
+ s3remote k = startswith "remote." k && endswith ".annex-s3-bucket" k
+
+genRemote :: Git.Repo -> UUID -> Annex (Remote Annex)
+genRemote r u = do
+ c <- remoteCost r
+ return Remote {
+ uuid = u,
+ cost = c,
+ name = Git.repoDescribe r,
+ storeKey = error "TODO",
+ retrieveKeyFile = error "TODO",
+ removeKey = error "TODO",
+ hasKey = error "TODO",
+ hasKeyCheap = False
+ }
+
+s3Connection :: Git.Repo -> Annex (Maybe AWSConnection)
+s3Connection r = do
+ host <- getS3Config r "s3-host" (Just defaultAmazonS3Host)
+ port <- getS3Config r "s3-port" (Just $ show defaultAmazonS3Port)
+ accesskey <- getS3Config r "s3-access-key-id" Nothing
+ secretkey <- getS3Config r "s3-secret-access-key" Nothing
+ case reads port of
+ [(p, _)] -> return $ Just $ AWSConnection host p accesskey secretkey
+ _ -> error $ "bad S3 port value: " ++ port
+
+withS3Connection :: Git.Repo -> Annex a -> ((AWSConnection, String) -> Annex a) -> Annex a
+withS3Connection r def a = do
+ c <- s3Connection r
+ case c of
+ Nothing -> def
+ Just c' -> do
+ b <- getConfig r "s3-bucket" ""
+ a (c', b)
+
+getS3Config :: Git.Repo -> String -> Maybe String-> Annex String
+getS3Config r s def = do
+ e <- liftIO $ catch (liftM Just $ getEnv envvar) (const $ return def)
+ v <- case e of
+ Nothing -> getConfig r s ""
+ Just d -> getConfig r s d
+ when (null v) $ error $ "set " ++ envvar ++ " or " ++ remoteConfig r s
+ return v
+ where
+ envvar = "ANNEX_" ++ map (\c -> if c == '-' then '_' else toUpper c) s
+
+{- The UUID of a S3 bucket is stored in a file "git-annex-uuid" in the
+ - bucket. Gets the UUID, or if there is none, sets a new UUID, possibly
+ - also creating the bucket. -}
+getS3UUID :: Git.Repo -> Annex UUID
+getS3UUID r = withS3Connection r disable $ \(c, b) -> do
+ res <- liftIO $
+ getObject c $ S3Object b uuidfile "" [] L.empty
+ case res of
+ Right o -> return $ L.unpack $ obj_data o
+ Left _ -> do
+ location <- getS3Config r "s3-datacenter" (Just "EU")
+ -- bucket may already exist, or not
+ _ <- liftIO $ createBucketIn c b location
+ u <- getUUID r
+ res' <- liftIO $ sendObject c $
+ S3Object b uuidfile "" [] $
+ L.pack u
+ case res' of
+ Right _ -> return u
+ Left e -> do
+ warning $ prettyReqError e
+ disable
+
+ where
+ uuidfile = "git-annex-uuid"
+ disable = return "" -- empty uuid will disable this remote
diff --git a/RemoteClass.hs b/RemoteClass.hs
index 38e8407a5..eb4a01748 100644
--- a/RemoteClass.hs
+++ b/RemoteClass.hs
@@ -13,6 +13,11 @@ import Control.Exception
import Key
+{- A remote generator identifies configured remotes, and returns an action
+ - that can be run to set up each remote, and a list of names of remotes
+ - that are not cheap to set up. -}
+type RemoteGenerator a = ([a (Remote a)], [String])
+
data Remote a = Remote {
-- each Remote has a unique uuid
uuid :: String,
diff --git a/Ssh.hs b/Ssh.hs
index 04cd9bec8..6d01a5642 100644
--- a/Ssh.hs
+++ b/Ssh.hs
@@ -7,17 +7,17 @@
module Ssh where
-import qualified Annex
import qualified GitRepo as Git
import Utility
import Types
+import Config
{- Generates parameters to ssh to a repository's host and run a command.
- Caller is responsible for doing any neccessary shellEscaping of the
- passed command. -}
sshToRepo :: Git.Repo -> [CommandParam] -> Annex [CommandParam]
sshToRepo repo sshcmd = do
- s <- Annex.repoConfig repo "ssh-options" ""
+ s <- getConfig repo "ssh-options" ""
let sshoptions = map Param (words s)
let sshport = case Git.urlPort repo of
Nothing -> []
diff --git a/UUID.hs b/UUID.hs
index 5caf11045..eb1fb319c 100644
--- a/UUID.hs
+++ b/UUID.hs
@@ -35,6 +35,7 @@ import Locations
import qualified Annex
import Utility
import qualified SysConfig
+import Config
type UUID = String
@@ -69,7 +70,7 @@ getUUID r = do
else return c
where
cached g = Git.configGet g cachekey ""
- updatecache g u = when (g /= r) $ Annex.setConfig cachekey u
+ updatecache g u = when (g /= r) $ setConfig cachekey u
cachekey = "remote." ++ fromMaybe "" (Git.repoRemoteName r) ++ ".annex-uuid"
getUncachedUUID :: Git.Repo -> UUID
@@ -82,7 +83,7 @@ prepUUID = do
u <- getUUID g
when ("" == u) $ do
uuid <- liftIO $ genUUID
- Annex.setConfig configkey uuid
+ setConfig configkey uuid
{- Pretty-prints a list of UUIDs -}
prettyPrintUUIDs :: [UUID] -> Annex String
diff --git a/Version.hs b/Version.hs
index d061a2eab..947af8cef 100644
--- a/Version.hs
+++ b/Version.hs
@@ -15,6 +15,7 @@ import Types
import qualified Annex
import qualified GitRepo as Git
import Locations
+import Config
type Version = String
@@ -54,7 +55,7 @@ getVersion = do
return defaultVersion
setVersion :: Annex ()
-setVersion = Annex.setConfig versionField defaultVersion
+setVersion = setConfig versionField defaultVersion
checkVersion :: Annex ()
checkVersion = do
diff --git a/debian/changelog b/debian/changelog
index 7251ab665..825382256 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,11 @@
+git-annex (0.20110329) UNRELEASED; urgency=low
+
+ * Amazon S3 is now supported as a special type of remote.
+ Warning: Encrypting data before sending it to S3 is not currently
+ supported.
+
+ -- Joey Hess <joeyh@debian.org> Sat, 26 Mar 2011 14:36:16 -0400
+
git-annex (0.20110328) experimental; urgency=low
* annex.diskreserve can be given in arbitrary units (ie "0.5 gigabytes")
diff --git a/doc/backends.mdwn b/doc/backends.mdwn
index 22164850a..b0a2c882a 100644
--- a/doc/backends.mdwn
+++ b/doc/backends.mdwn
@@ -9,6 +9,10 @@ to retrieve the file's content (its value).
Multiple pluggable backends are supported, and a single repository
can use different backends for different files.
+These backends can transfer file contents in configured git remotes.
+It's also possible to use [[special_remotes]], such as Amazon S3 with
+these backends.
+
* `WORM` ("Write Once, Read Many") This backend assumes that any file with
the same basename, size, and modification time has the same content. So with
this backend, files can be moved around, but should never be added to
@@ -19,6 +23,9 @@ can use different backends for different files.
* `SHA512`, `SHA384`, `SHA256`, `SHA224` -- Like SHA1, but larger
checksums. Mostly useful for the very paranoid, or anyone who is
researching checksum collisions and wants to annex their colliding data. ;)
+
+These backends store file contents in other key/value stores.
+
* `URL` -- This backend downloads the file's content from an external URL.
The `annex.backends` git-config setting can be used to list the backends
diff --git a/doc/bugs/git-annex_has_issues_with_git_when_staging__47__commiting_logs.mdwn b/doc/bugs/git-annex_has_issues_with_git_when_staging__47__commiting_logs.mdwn
index 554cfa41e..774ca6a16 100644
--- a/doc/bugs/git-annex_has_issues_with_git_when_staging__47__commiting_logs.mdwn
+++ b/doc/bugs/git-annex_has_issues_with_git_when_staging__47__commiting_logs.mdwn
@@ -7,7 +7,13 @@ For now it's just a bit of extra work for me when it does occur but it does not
> What do you mean when you say that git "got wedged"? It hung somehow?
>
> If git-annex runs concurrently with another git command that locks
-> the repository its git add of log files can fail.
+> the repository, its git add of log files can fail.
+>
+> Update: Also, of course, if you are running a "got annex get" or
+> similar, and ctrl-c it after it has gotten some files, it can
+> end up with unstaged or in some cases un-added log files that git-annex
+> wrote -- since git-annex only stages log files in git on shutdown, and
+> ctrl-c bypasses that.
> --[[Joey]]
>> It "got wedged" as in git doesn't let me commit anything, even though it tells me that there is stuff to be committed in the staging area.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 8afe93c10..3985addc6 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -132,6 +132,22 @@ Many git-annex commands will stage changes for later `git commit` by you.
by uuid. To change the description of the current repository, use
"."
+* s3bucket name gpgkey [datacenter host port]
+
+ Create or updates the key of a bucket in Amazon S3. The bucket's
+ name can be used to configure git remote using the bucket.
+
+ The gpgkey is a value that can be looked up (using gpg -k) to
+ find a gpg encryption key that will be given access to the bucket.
+ To disable encryption, specify "unencrypted". Note that additional gpg
+ keys can be given access to a bucket by running s3bucket on an existing
+ bucket, with a new key.
+
+ The datacenter defaults to "US". Other values include "EU",
+ "us-west-1", and "ap-southeast-1".
+
+ To use a different, S3-compatable service, specify a host and port.
+
* fsck [path ...]
With no parameters, this command checks the whole annex for consistency,
@@ -387,6 +403,25 @@ Here are all the supported configuration settings.
Default ssh and rsync options to use if a remote does not have
specific options.
+* `remote.<name>.annex-s3-access-key-id`
+
+ Your S3 Access Key ID. Does not need to be kept private.
+ If not set, the environment variable `AWS_ACCESS_KEY_ID`
+ will be used.
+
+* `remote.<name>.annex-s3-secret-access-key`
+
+ Your S3 Secret Access Key. This is a password.
+ If not set, the environment variable `AWS_SECRET_ACCESS_KEY`
+ will be used.
+
+* `remote.<name>.annex-s3-storageclass`
+
+ Storage class to use when adding new content to S3. The default
+ is "STANDARD". If you have configured git-annex to preserve
+ multiple [[copies]], consider setting this to "REDUCED_REDUNDANCY"
+ to save money.
+
* `annex.diskreserve`
Amount of disk space to reserve. Disk space is checked when transferring
@@ -401,6 +436,8 @@ Here are all the supported configuration settings.
Automatically maintained, and used to automate upgrades between versions.
+
+
# CONFIGURATION VIA .gitattributes
The backend used when adding a new file to the annex can be configured
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index a133320b4..55b1045a1 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -30,6 +30,17 @@ space and then the description through to the end of the line. Example:
e605dca6-446a-11e0-8b2a-002170d25c55 laptop
26339d22-446b-11e0-9101-002170d25c55 usb disk
+## `git-annex/s3.log`
+
+Associates the UUIDs of Amazon S3 buckets with a bucket nickname and connection
+information. Example:
+
+ be72acb8-5901-11e0-b600-002170d25c55 mybucket s3.amazonaws.com 80
+
+Note that the actual bucket name used on S3 in the above example
+is "mybucket-be72acb8-5901-11e0-b600-002170d25c55". The UUID is included
+in the bucket name to ensure it is globally unique.
+
## `.git-annex/trust.log`
Records the [[trust]] information for repositories. Does not exist unless
diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn
new file mode 100644
index 000000000..651b24afa
--- /dev/null
+++ b/doc/special_remotes.mdwn
@@ -0,0 +1,9 @@
+Most [[backends]] can transfer data to and from configured git remotes.
+Normally those remotes are normal git repositories (bare and non-bare),
+that store the file contents in their own git annex directory.
+
+But, git-annex also extends git's concept of remotes, with these special
+types of remotes. These can be used just like any normal remote by git-annex.
+They cannot be used by other git commands though.
+
+* [[Amazon_S3]]
diff --git a/doc/special_remotes/Amazon_S3.mdwn b/doc/special_remotes/Amazon_S3.mdwn
new file mode 100644
index 000000000..67bea3b1c
--- /dev/null
+++ b/doc/special_remotes/Amazon_S3.mdwn
@@ -0,0 +1,46 @@
+This special remote type stores file contents in a bucket in Amazon S3
+or a similar service.
+
+See [[walkthrough/using_Amazon_S3]] for usage examples.
+
+## bucket names
+
+When `git annex s3bucket` is used to create a new bucket, it generates a
+UUID, and the name of the bucket includes that UUID, as well as the name
+specified by the user. This makes for some unweidly bucket names, but
+since S3 requires that bucket names be globally unique, it avoids needing
+to hunt for a unused bucket name.
+
+## data security
+
+When `git annex s3bucket` is used to create an unencrypted bucket,
+there is **no** protection against your data being read as it is sent
+to/from S3, or by Amazon when it is stored in S3. This should only be used
+for public data.
+
+** Encryption is not yet supported. **
+
+When an encrypted bucket is created, all files stored in the bucket are
+encrypted with gpg. Additionally, the filenames themselves are hashed
+to obfuscate them. The size of the encrypted files, and access patterns of
+the data, should be the only clues to what type of data you are storing in
+S3.
+
+[[!template id=note text="""
+This scheme was originally developed by Lars Wirzenius at al
+[for Obnam](http://braawi.org/obnam/encryption/).
+"""]]
+The data stored in S3 is encrypted by gpg with a symmetric cipher. The
+passphrase of the cipher is itself checked into your git repository,
+encrypted using one or more gpg public keys. This scheme allows new private
+keys to be given access to a bucket's content, after the bucket is created
+and is in use. It also allows revoking compromised private keys without
+having to throw out the contents of the bucket. The symmetric cipher
+is also hashed together with filenames used in the bucket, obfuscate
+the filenames.
+
+To add a new gpg key to an existing bucket, just re-run `git annex
+s3bucket`, specifying the new key id. For example:
+
+ # git annex s3bucket mybucket 16D0B8EF
+ s3bucket (adding gpg key 16D0B8EF) ok
diff --git a/doc/todo/S3.mdwn b/doc/todo/S3.mdwn
index 09a64f1a7..356b2af2e 100644
--- a/doc/todo/S3.mdwn
+++ b/doc/todo/S3.mdwn
@@ -1,3 +1,5 @@
+[[done]]
+
Support Amazon S3 as a file storage backend.
There's a haskell library that looks good. Not yet in Debian.
diff --git a/doc/transferring_data.mdwn b/doc/transferring_data.mdwn
index 9526a3e48..f6ae9bfcd 100644
--- a/doc/transferring_data.mdwn
+++ b/doc/transferring_data.mdwn
@@ -1,7 +1,8 @@
git-annex can transfer data to or from any of a repository's git remotes.
Depending on where the remote is, the data transfer is done using rsync
(over ssh, with automatic resume), or plain cp (with copy-on-write
-optimisations on supported filesystems).
+optimisations on supported filesystems). Some [[special_remotes]]
+are also supported that are not traditional git remotes.
It's equally easy to transfer a single file to or from a repository,
or to launch a retrievel of a massive pile of files from whatever
diff --git a/doc/use_case/Alice.mdwn b/doc/use_case/Alice.mdwn
index ccc0727bc..ee97efa43 100644
--- a/doc/use_case/Alice.mdwn
+++ b/doc/use_case/Alice.mdwn
@@ -2,9 +2,9 @@
Alice is always on the move, often with her trusty netbook and a small
handheld terabyte USB drive, or a smaller USB keydrive. She has a server
-out there on the net. All these things can have different files on them,
-but Alice no longer has to deal with the tedious process of keeping them
-manually in sync.
+out there on the net. She stores data in the Cloud. All these things can
+have different files on them, but Alice no longer has to deal with the
+tedious process of keeping them manually in sync.
When she has 1 bar on her cell, Alice queues up interesting files on her
server for later. At a coffee shop, she has git-annex download them to her
diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn
index 3b4f7d56a..53f0be6bb 100644
--- a/doc/walkthrough.mdwn
+++ b/doc/walkthrough.mdwn
@@ -14,6 +14,7 @@ A walkthrough of the basic features of git-annex.
modifying_annexed_files
using_ssh_remotes
moving_file_content_between_repositories
+ using_Amazon_S3
using_the_URL_backend
using_the_SHA1_backend
migrating_data_to_a_new_backend
diff --git a/doc/walkthrough/moving_file_content_between_repositories.mdwn b/doc/walkthrough/moving_file_content_between_repositories.mdwn
index 6b3e3f4e8..27dffe913 100644
--- a/doc/walkthrough/moving_file_content_between_repositories.mdwn
+++ b/doc/walkthrough/moving_file_content_between_repositories.mdwn
@@ -6,8 +6,8 @@ server to your laptop. Doing that by hand (by using `git annex get` and
makes it very easy.
# git annex move my_cool_big_file --to usbdrive
- move my_cool_big_file (moving to usbdrive...) ok
+ move my_cool_big_file (to usbdrive...) ok
# git annex move video/hackity_hack_and_kaxxt.mov --from fileserver
- move video/hackity_hack_and_kaxxt.mov (moving from fileserver...)
+ move video/hackity_hack_and_kaxxt.mov (from fileserver...)
WORM-s86050597-m1274316523--hackity_hack_and_kax 100% 82MB 199.1KB/s 07:02
ok
diff --git a/doc/walkthrough/using_Amazon_S3.mdwn b/doc/walkthrough/using_Amazon_S3.mdwn
new file mode 100644
index 000000000..2833a9c5a
--- /dev/null
+++ b/doc/walkthrough/using_Amazon_S3.mdwn
@@ -0,0 +1,29 @@
+git-annex extends git's usual remotes with some [[special_remotes]], that
+are not git repositories. This way you can set up a remote using say,
+Amazon S3, and use git-annex to transfer files into the cloud.
+
+First, export your S3 credentials:
+
+ export ANNEX_S3_ACCESS_KEY_ID="08TJMT99S3511WOZEP91"
+ export ANNEX_S3_SECRET_ACCESS_KEY="s3kr1t"
+
+Next, create a bucket, giving it a name and a description:
+
+ git annex s3bucket mybucket unencrypted
+ s3bucket (creating mybucket...) (no encryption!) ok
+
+**Note that encrypted buckets are not (yet) supported. Data sent to S3
+is susceptible to snooping.**
+
+Finally, configure a git remote to use the bucket you created:
+
+ git config remote.mys3.annex-s3-bucket mybucket
+
+Now the remote can be used like any other remote.
+
+ # git annex copy my_cool_big_file --to mys3
+ copy my_cool_big_file (to mys3...) ok
+ # git annex move video/hackity_hack_and_kaxxt.mov --to mys3
+ move video/hackity_hack_and_kaxxt.mov (to mys3...) ok
+
+See [[special_remotes/Amazon_S3]] for details.