summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-04-27 20:06:07 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-04-27 20:23:09 -0400
commite68f128a9bf46c8f4ebe51fcb3b6f63955cadd2e (patch)
tree2102052ed8289efa0e44b4f9269c423753d70983
parent4381ac062fa7cec01476e84a7bd8e154efb8aacd (diff)
rsync special remote
Fully tested and working, including resuming and encryption. (Though not resuming when sending *with* encryption; gpg doesn't produce identical output each time.) Uses same layout as the directory special remote and the .git/annex/objects/ directory.
-rw-r--r--Content.hs12
-rw-r--r--Remote.hs2
-rw-r--r--Remote/Git.hs2
-rw-r--r--Remote/Rsync.hs193
-rw-r--r--Remote/S3real.hs20
-rw-r--r--Utility.hs5
-rw-r--r--debian/changelog2
-rw-r--r--doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn22
-rw-r--r--doc/special_remotes.mdwn1
-rw-r--r--doc/special_remotes/directory.mdwn2
-rw-r--r--doc/special_remotes/rsync.mdwn23
11 files changed, 265 insertions, 19 deletions
diff --git a/Content.hs b/Content.hs
index dd0ea5ca1..99770f553 100644
--- a/Content.hs
+++ b/Content.hs
@@ -12,6 +12,7 @@ module Content (
logStatusFor,
getViaTmp,
getViaTmpUnchecked,
+ withTmp,
checkDiskSpace,
preventWrite,
allowWrite,
@@ -127,6 +128,17 @@ getViaTmpUnchecked key action = do
-- to resume its transfer
return False
+{- Creates a temp file, runs an action on it, and cleans up the temp file. -}
+withTmp :: Key -> (FilePath -> Annex a) -> Annex a
+withTmp key action = do
+ g <- Annex.gitRepo
+ let tmp = gitAnnexTmpLocation g key
+ liftIO $ createDirectoryIfMissing True (parentDir tmp)
+ res <- action tmp
+ tmp_exists <- liftIO $ doesFileExist tmp
+ when tmp_exists $ liftIO $ removeFile tmp
+ return res
+
{- Checks that there is disk space available to store a given key,
- throwing an error if not. -}
checkDiskSpace :: Key -> Annex ()
diff --git a/Remote.hs b/Remote.hs
index 8d2ab0399..f47bea560 100644
--- a/Remote.hs
+++ b/Remote.hs
@@ -48,6 +48,7 @@ import qualified Remote.Git
import qualified Remote.S3
import qualified Remote.Bup
import qualified Remote.Directory
+import qualified Remote.Rsync
remoteTypes :: [RemoteType Annex]
remoteTypes =
@@ -55,6 +56,7 @@ remoteTypes =
, Remote.S3.remote
, Remote.Bup.remote
, Remote.Directory.remote
+ , Remote.Rsync.remote
]
{- Builds a list of all available Remotes.
diff --git a/Remote/Git.hs b/Remote/Git.hs
index bab452a33..e6df6be46 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -158,7 +158,7 @@ rsynchelper :: Git.Repo -> Bool -> Key -> FilePath -> Annex (Bool)
rsynchelper r sending key file = do
showProgress -- make way for progress bar
p <- rsyncParams r sending key file
- res <- liftIO $ boolSystem "rsync" p
+ res <- liftIO $ rsync p
if res
then return res
else do
diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs
new file mode 100644
index 000000000..0a62ff92f
--- /dev/null
+++ b/Remote/Rsync.hs
@@ -0,0 +1,193 @@
+{- A remote that is only accessible by rsync.
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Remote.Rsync (remote) where
+
+import qualified Data.ByteString.Lazy.Char8 as L
+import Control.Exception.Extensible (IOException)
+import qualified Data.Map as M
+import Control.Monad.State (liftIO, when)
+import System.FilePath
+import System.Directory
+import System.Posix.Files
+import System.Posix.Process
+
+import RemoteClass
+import Types
+import qualified GitRepo as Git
+import qualified Annex
+import UUID
+import Locations
+import Config
+import Content
+import Utility
+import Remote.Special
+import Remote.Encryptable
+import Crypto
+import Messages
+import RsyncFile
+
+type RsyncUrl = String
+
+data RsyncOpts = RsyncOpts {
+ rsyncUrl :: RsyncUrl,
+ rsyncOptions :: [CommandParam]
+}
+
+remote :: RemoteType Annex
+remote = RemoteType {
+ typename = "rsync",
+ enumerate = findSpecialRemotes "rsyncurl",
+ generate = gen,
+ setup = rsyncSetup
+}
+
+gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex)
+gen r u c = do
+ url <- getConfig r "rsyncurl" (error "missing rsyncurl")
+ opts <- getConfig r "rsync-options" ""
+ let o = RsyncOpts url $ map Param $ words opts
+ cst <- remoteCost r expensiveRemoteCost
+ return $ encryptableRemote c
+ (storeEncrypted o)
+ (retrieveEncrypted o)
+ Remote {
+ uuid = u,
+ cost = cst,
+ name = Git.repoDescribe r,
+ storeKey = store o,
+ retrieveKeyFile = retrieve o,
+ removeKey = remove o,
+ hasKey = checkPresent r o,
+ hasKeyCheap = True,
+ config = Nothing
+ }
+
+rsyncSetup :: UUID -> RemoteConfig -> Annex RemoteConfig
+rsyncSetup u c = do
+ -- verify configuration is sane
+ let url = case M.lookup "rsyncurl" c of
+ Nothing -> error "Specify rsyncurl="
+ Just d -> d
+ c' <- encryptionSetup c
+
+ -- The rsyncurl is stored in git config, not only in this remote's
+ -- persistant state, so it can vary between hosts.
+ gitConfigSpecialRemote u c' "rsyncurl" url
+ return c'
+
+rsyncKey :: RsyncOpts -> Key -> String
+rsyncKey o k = rsyncUrl o </> hashDirMixed k </> f </> f
+ where
+ f = keyFile k
+
+store :: RsyncOpts -> Key -> Annex Bool
+store o k = do
+ g <- Annex.gitRepo
+ rsyncSend o k (gitAnnexLocation g k)
+
+storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> Annex Bool
+storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do
+ g <- Annex.gitRepo
+ let f = gitAnnexLocation g k
+ liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
+ rsyncSend o enck tmp
+
+retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool
+retrieve o k f = rsyncRemote o
+ -- use inplace when retrieving to support resuming
+ [ Param "--inplace"
+ , Param $ rsyncKey o k
+ , Param f
+ ]
+
+retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool
+retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do
+ res <- retrieve o enck tmp
+ if res
+ then liftIO $ catchBool $ do
+ withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f
+ return True
+ else return res
+
+remove :: RsyncOpts -> Key -> Annex Bool
+remove o k = withRsyncScratchDir $ \tmp -> do
+ {- Send an empty directory to rysnc as the parent directory
+ - of the file to remove. -}
+ let dummy = tmp </> keyFile k
+ liftIO $ createDirectoryIfMissing True dummy
+ liftIO $ rsync $ rsyncOptions o ++
+ [ Params "--delete --recursive"
+ , partialParams
+ , Param $ addTrailingPathSeparator dummy
+ , Param $ parentDir $ rsyncKey o k
+ ]
+
+checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either IOException Bool)
+checkPresent r o k = do
+ showNote ("checking " ++ Git.repoDescribe r ++ "...")
+ -- note: Does not currently differnetiate between rsync failing
+ -- to connect, and the file not being present.
+ res <- liftIO $ boolSystem "sh" [Param "-c", Param cmd]
+ return $ Right res
+ where
+ cmd = "rsync --quiet " ++ testfile ++ " 2>/dev/null"
+ testfile = shellEscape $ rsyncKey o k
+
+{- Rsync params to enable resumes of sending files safely,
+ - ensure that files are only moved into place once complete
+ -}
+partialParams :: CommandParam
+partialParams = Params "--no-inplace --partial --partial-dir=.rsync-partial"
+
+{- Runs an action in an empty scratch directory that can be used to build
+ - up trees for rsync. -}
+withRsyncScratchDir :: (FilePath -> Annex Bool) -> Annex Bool
+withRsyncScratchDir a = do
+ g <- Annex.gitRepo
+ pid <- liftIO $ getProcessID
+ let tmp = gitAnnexTmpDir g </> "rsynctmp" </> show pid
+ nuke tmp
+ liftIO $ createDirectoryIfMissing True $ tmp
+ res <- a tmp
+ nuke tmp
+ return res
+ where
+ nuke d = liftIO $ do
+ e <- doesDirectoryExist d
+ when e $ liftIO $ removeDirectoryRecursive d
+
+rsyncRemote :: RsyncOpts -> [CommandParam] -> Annex Bool
+rsyncRemote o params = do
+ showProgress -- make way for progress bar
+ res <- liftIO $ rsync $ rsyncOptions o ++ defaultParams ++ params
+ if res
+ then return res
+ else do
+ showLongNote "rsync failed -- run git annex again to resume file transfer"
+ return res
+ where
+ defaultParams = [Params "--progress"]
+
+{- To send a single key is slightly tricky; need to build up a temporary
+ directory structure to pass to rsync so it can create the hash
+ directories. -}
+rsyncSend :: RsyncOpts -> Key -> FilePath -> Annex Bool
+rsyncSend o k src = withRsyncScratchDir $ \tmp -> do
+ let dest = tmp </> hashDirMixed k </> f </> f
+ liftIO $ createDirectoryIfMissing True $ parentDir $ dest
+ liftIO $ createLink src dest
+ res <- rsyncRemote o
+ [ Param "--recursive"
+ , partialParams
+ -- tmp/ to send contents of tmp dir
+ , Param $ addTrailingPathSeparator tmp
+ , Param $ rsyncUrl o
+ ]
+ return res
+ where
+ f = keyFile k
diff --git a/Remote/S3real.hs b/Remote/S3real.hs
index 07e33368e..2e198f79d 100644
--- a/Remote/S3real.hs
+++ b/Remote/S3real.hs
@@ -19,7 +19,6 @@ import Control.Monad (when)
import Control.Monad.State (liftIO)
import System.Environment
import System.Posix.Files
-import System.Directory
import RemoteClass
import Types
@@ -33,7 +32,7 @@ import Remote.Special
import Remote.Encryptable
import Crypto
import Key
-import Utility
+import Content
remote :: RemoteType Annex
remote = RemoteType {
@@ -108,18 +107,15 @@ store r k = s3Action r False $ \(conn, bucket) -> do
s3Bool res
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
-storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do
- g <- Annex.gitRepo
- let f = gitAnnexLocation g k
+storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) ->
-- To get file size of the encrypted content, have to use a temp file.
-- (An alternative would be chunking to to a constant size.)
- let tmp = gitAnnexTmpLocation g enck
- liftIO $ createDirectoryIfMissing True (parentDir tmp)
- liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
- res <- liftIO $ storeHelper (conn, bucket) r enck tmp
- tmp_exists <- liftIO $ doesFileExist tmp
- when tmp_exists $ liftIO $ removeFile tmp
- s3Bool res
+ withTmp enck $ \tmp -> do
+ g <- Annex.gitRepo
+ let f = gitAnnexLocation g k
+ liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
+ res <- liftIO $ storeHelper (conn, bucket) r enck tmp
+ s3Bool res
storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ())
storeHelper (conn, bucket) r k file = do
diff --git a/Utility.hs b/Utility.hs
index 13ebbfccb..51bbc17a3 100644
--- a/Utility.hs
+++ b/Utility.hs
@@ -95,8 +95,9 @@ boolSystem command params = do
restoresignals oldint oldset
executeFile command True (toCommand params) Nothing
-{- Escapes a filename to be safely able to be exposed to the shell. -}
-shellEscape :: FilePath -> String
+{- Escapes a filename or other parameter to be safely able to be exposed to
+ - the shell. -}
+shellEscape :: String -> String
shellEscape f = "'" ++ escaped ++ "'"
where
-- replace ' with '"'"'
diff --git a/debian/changelog b/debian/changelog
index ce5e651b0..78f65e8b9 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -2,6 +2,8 @@ git-annex (0.20110426) UNRELEASED; urgency=low
* Switch back to haskell SHA library, so git-annex remains buildable on
Debian stable.
+ * Added rsync special remotes. This could be used, for example, to
+ store annexed content on rsync.net, encrypted naturally. Or anywhere else.
-- Joey Hess <joeyh@debian.org> Tue, 26 Apr 2011 11:23:54 -0400
diff --git a/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn
index becfe23c7..9807bf91e 100644
--- a/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn
+++ b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn
@@ -1,5 +1,21 @@
-i think it would be useful to have a fourth kind of [[special remote]]s that connects to a dumb storage using sftp or rsync. this can be emulated by using sshfs, but that means lots of round-trips through the system and is limited to platforms where sshfs is available.
+i think it would be useful to have a fourth kind of [[special remote]]s
+that connects to a dumb storage using sftp or rsync. this can be emulated
+by using sshfs, but that means lots of round-trips through the system and
+is limited to platforms where sshfs is available.
-typical use cases are backups to storate shared between a group of people where each user only has limited access (sftp or rsync), when using [[bup]] is not an option.
+typical use cases are backups to storate shared between a group of people
+where each user only has limited access (sftp or rsync), when using [[bup]]
+is not an option.
-an alternative to implementing yet another special remote would be to have some kind of plugin system by which external programs can provide an interface to key-value stores (i'd implement the sftp backend myself, but haven't learned haskell yet).
+an alternative to implementing yet another special remote would be to have
+some kind of plugin system by which external programs can provide an
+interface to key-value stores (i'd implement the sftp backend myself, but
+haven't learned haskell yet).
+
+> Ask and ye [[shall receive|special_remotes/rsync]].
+>
+> Sometimes I almost think that a generic configurable special remote that
+> just uses configured shell commands would be useful.. But there's really
+> no comparison with sitting down and writing code tuned to work with
+> a given transport like rsync, when it comes to reliability and taking
+> advantage of its abilities (like resuming). --[[Joey]]
diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn
index a33d3f612..210f995d2 100644
--- a/doc/special_remotes.mdwn
+++ b/doc/special_remotes.mdwn
@@ -9,6 +9,7 @@ They cannot be used by other git commands though.
* [[Amazon_S3]]
* [[bup]]
* [[directory]]
+* [[rsync]]
## Unused content on special remotes
diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn
index 8006c44fc..9e4bfa33b 100644
--- a/doc/special_remotes/directory.mdwn
+++ b/doc/special_remotes/directory.mdwn
@@ -7,4 +7,4 @@ the drive's mountpoint as a directory remote.
Setup example:
- # git annex initremote usbdrive directory=/media/usbdrive/ encryption=none
+ # git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none
diff --git a/doc/special_remotes/rsync.mdwn b/doc/special_remotes/rsync.mdwn
new file mode 100644
index 000000000..593644291
--- /dev/null
+++ b/doc/special_remotes/rsync.mdwn
@@ -0,0 +1,23 @@
+This special remote type rsyncs file contents to somewhere else.
+
+Setup example:
+
+ # git annex initremote myrsync type=rsync rsyncurl=rsync://rsync.example.com/myrsync encryption=joey@kitenet.net
+
+## configuration
+
+These parameters can be passed to `git annex initremote` to configure rsync:
+
+* `encryption` - Required. Either "none" to disable encryption of content
+ stored in rsync,
+ or a value that can be looked up (using gpg -k) to find a gpg encryption
+ key that will be given access to the remote. Note that additional gpg
+ keys can be given access to a remote by rerunning initremote with
+ the new key id. See [[encryption]].
+
+* `rsyncurl` - Required. This is the url or `hostname:/directory` to
+ pass to rsync to tell it where to store content.
+
+The `annex-rsync-options` git configuration setting can be used to pass
+parameters to rsync. Note that it is **not safe** to put "--delete"
+in `annex-rsync-options` when using rsync special remotes.