diff options
author | Joey Hess <joey@kitenet.net> | 2011-04-27 20:06:07 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2011-04-27 20:23:09 -0400 |
commit | e68f128a9bf46c8f4ebe51fcb3b6f63955cadd2e (patch) | |
tree | 2102052ed8289efa0e44b4f9269c423753d70983 | |
parent | 4381ac062fa7cec01476e84a7bd8e154efb8aacd (diff) |
rsync special remote
Fully tested and working, including resuming and encryption. (Though not
resuming when sending *with* encryption; gpg doesn't produce identical
output each time.)
Uses same layout as the directory special remote and the .git/annex/objects/
directory.
-rw-r--r-- | Content.hs | 12 | ||||
-rw-r--r-- | Remote.hs | 2 | ||||
-rw-r--r-- | Remote/Git.hs | 2 | ||||
-rw-r--r-- | Remote/Rsync.hs | 193 | ||||
-rw-r--r-- | Remote/S3real.hs | 20 | ||||
-rw-r--r-- | Utility.hs | 5 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn | 22 | ||||
-rw-r--r-- | doc/special_remotes.mdwn | 1 | ||||
-rw-r--r-- | doc/special_remotes/directory.mdwn | 2 | ||||
-rw-r--r-- | doc/special_remotes/rsync.mdwn | 23 |
11 files changed, 265 insertions, 19 deletions
diff --git a/Content.hs b/Content.hs index dd0ea5ca1..99770f553 100644 --- a/Content.hs +++ b/Content.hs @@ -12,6 +12,7 @@ module Content ( logStatusFor, getViaTmp, getViaTmpUnchecked, + withTmp, checkDiskSpace, preventWrite, allowWrite, @@ -127,6 +128,17 @@ getViaTmpUnchecked key action = do -- to resume its transfer return False +{- Creates a temp file, runs an action on it, and cleans up the temp file. -} +withTmp :: Key -> (FilePath -> Annex a) -> Annex a +withTmp key action = do + g <- Annex.gitRepo + let tmp = gitAnnexTmpLocation g key + liftIO $ createDirectoryIfMissing True (parentDir tmp) + res <- action tmp + tmp_exists <- liftIO $ doesFileExist tmp + when tmp_exists $ liftIO $ removeFile tmp + return res + {- Checks that there is disk space available to store a given key, - throwing an error if not. -} checkDiskSpace :: Key -> Annex () @@ -48,6 +48,7 @@ import qualified Remote.Git import qualified Remote.S3 import qualified Remote.Bup import qualified Remote.Directory +import qualified Remote.Rsync remoteTypes :: [RemoteType Annex] remoteTypes = @@ -55,6 +56,7 @@ remoteTypes = , Remote.S3.remote , Remote.Bup.remote , Remote.Directory.remote + , Remote.Rsync.remote ] {- Builds a list of all available Remotes. diff --git a/Remote/Git.hs b/Remote/Git.hs index bab452a33..e6df6be46 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -158,7 +158,7 @@ rsynchelper :: Git.Repo -> Bool -> Key -> FilePath -> Annex (Bool) rsynchelper r sending key file = do showProgress -- make way for progress bar p <- rsyncParams r sending key file - res <- liftIO $ boolSystem "rsync" p + res <- liftIO $ rsync p if res then return res else do diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs new file mode 100644 index 000000000..0a62ff92f --- /dev/null +++ b/Remote/Rsync.hs @@ -0,0 +1,193 @@ +{- A remote that is only accessible by rsync. + - + - Copyright 2011 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Rsync (remote) where + +import qualified Data.ByteString.Lazy.Char8 as L +import Control.Exception.Extensible (IOException) +import qualified Data.Map as M +import Control.Monad.State (liftIO, when) +import System.FilePath +import System.Directory +import System.Posix.Files +import System.Posix.Process + +import RemoteClass +import Types +import qualified GitRepo as Git +import qualified Annex +import UUID +import Locations +import Config +import Content +import Utility +import Remote.Special +import Remote.Encryptable +import Crypto +import Messages +import RsyncFile + +type RsyncUrl = String + +data RsyncOpts = RsyncOpts { + rsyncUrl :: RsyncUrl, + rsyncOptions :: [CommandParam] +} + +remote :: RemoteType Annex +remote = RemoteType { + typename = "rsync", + enumerate = findSpecialRemotes "rsyncurl", + generate = gen, + setup = rsyncSetup +} + +gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex) +gen r u c = do + url <- getConfig r "rsyncurl" (error "missing rsyncurl") + opts <- getConfig r "rsync-options" "" + let o = RsyncOpts url $ map Param $ words opts + cst <- remoteCost r expensiveRemoteCost + return $ encryptableRemote c + (storeEncrypted o) + (retrieveEncrypted o) + Remote { + uuid = u, + cost = cst, + name = Git.repoDescribe r, + storeKey = store o, + retrieveKeyFile = retrieve o, + removeKey = remove o, + hasKey = checkPresent r o, + hasKeyCheap = True, + config = Nothing + } + +rsyncSetup :: UUID -> RemoteConfig -> Annex RemoteConfig +rsyncSetup u c = do + -- verify configuration is sane + let url = case M.lookup "rsyncurl" c of + Nothing -> error "Specify rsyncurl=" + Just d -> d + c' <- encryptionSetup c + + -- The rsyncurl is stored in git config, not only in this remote's + -- persistant state, so it can vary between hosts. + gitConfigSpecialRemote u c' "rsyncurl" url + return c' + +rsyncKey :: RsyncOpts -> Key -> String +rsyncKey o k = rsyncUrl o </> hashDirMixed k </> f </> f + where + f = keyFile k + +store :: RsyncOpts -> Key -> Annex Bool +store o k = do + g <- Annex.gitRepo + rsyncSend o k (gitAnnexLocation g k) + +storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> Annex Bool +storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do + g <- Annex.gitRepo + let f = gitAnnexLocation g k + liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s + rsyncSend o enck tmp + +retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool +retrieve o k f = rsyncRemote o + -- use inplace when retrieving to support resuming + [ Param "--inplace" + , Param $ rsyncKey o k + , Param f + ] + +retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool +retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do + res <- retrieve o enck tmp + if res + then liftIO $ catchBool $ do + withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f + return True + else return res + +remove :: RsyncOpts -> Key -> Annex Bool +remove o k = withRsyncScratchDir $ \tmp -> do + {- Send an empty directory to rysnc as the parent directory + - of the file to remove. -} + let dummy = tmp </> keyFile k + liftIO $ createDirectoryIfMissing True dummy + liftIO $ rsync $ rsyncOptions o ++ + [ Params "--delete --recursive" + , partialParams + , Param $ addTrailingPathSeparator dummy + , Param $ parentDir $ rsyncKey o k + ] + +checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either IOException Bool) +checkPresent r o k = do + showNote ("checking " ++ Git.repoDescribe r ++ "...") + -- note: Does not currently differnetiate between rsync failing + -- to connect, and the file not being present. + res <- liftIO $ boolSystem "sh" [Param "-c", Param cmd] + return $ Right res + where + cmd = "rsync --quiet " ++ testfile ++ " 2>/dev/null" + testfile = shellEscape $ rsyncKey o k + +{- Rsync params to enable resumes of sending files safely, + - ensure that files are only moved into place once complete + -} +partialParams :: CommandParam +partialParams = Params "--no-inplace --partial --partial-dir=.rsync-partial" + +{- Runs an action in an empty scratch directory that can be used to build + - up trees for rsync. -} +withRsyncScratchDir :: (FilePath -> Annex Bool) -> Annex Bool +withRsyncScratchDir a = do + g <- Annex.gitRepo + pid <- liftIO $ getProcessID + let tmp = gitAnnexTmpDir g </> "rsynctmp" </> show pid + nuke tmp + liftIO $ createDirectoryIfMissing True $ tmp + res <- a tmp + nuke tmp + return res + where + nuke d = liftIO $ do + e <- doesDirectoryExist d + when e $ liftIO $ removeDirectoryRecursive d + +rsyncRemote :: RsyncOpts -> [CommandParam] -> Annex Bool +rsyncRemote o params = do + showProgress -- make way for progress bar + res <- liftIO $ rsync $ rsyncOptions o ++ defaultParams ++ params + if res + then return res + else do + showLongNote "rsync failed -- run git annex again to resume file transfer" + return res + where + defaultParams = [Params "--progress"] + +{- To send a single key is slightly tricky; need to build up a temporary + directory structure to pass to rsync so it can create the hash + directories. -} +rsyncSend :: RsyncOpts -> Key -> FilePath -> Annex Bool +rsyncSend o k src = withRsyncScratchDir $ \tmp -> do + let dest = tmp </> hashDirMixed k </> f </> f + liftIO $ createDirectoryIfMissing True $ parentDir $ dest + liftIO $ createLink src dest + res <- rsyncRemote o + [ Param "--recursive" + , partialParams + -- tmp/ to send contents of tmp dir + , Param $ addTrailingPathSeparator tmp + , Param $ rsyncUrl o + ] + return res + where + f = keyFile k diff --git a/Remote/S3real.hs b/Remote/S3real.hs index 07e33368e..2e198f79d 100644 --- a/Remote/S3real.hs +++ b/Remote/S3real.hs @@ -19,7 +19,6 @@ import Control.Monad (when) import Control.Monad.State (liftIO) import System.Environment import System.Posix.Files -import System.Directory import RemoteClass import Types @@ -33,7 +32,7 @@ import Remote.Special import Remote.Encryptable import Crypto import Key -import Utility +import Content remote :: RemoteType Annex remote = RemoteType { @@ -108,18 +107,15 @@ store r k = s3Action r False $ \(conn, bucket) -> do s3Bool res storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool -storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do - g <- Annex.gitRepo - let f = gitAnnexLocation g k +storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> -- To get file size of the encrypted content, have to use a temp file. -- (An alternative would be chunking to to a constant size.) - let tmp = gitAnnexTmpLocation g enck - liftIO $ createDirectoryIfMissing True (parentDir tmp) - liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s - res <- liftIO $ storeHelper (conn, bucket) r enck tmp - tmp_exists <- liftIO $ doesFileExist tmp - when tmp_exists $ liftIO $ removeFile tmp - s3Bool res + withTmp enck $ \tmp -> do + g <- Annex.gitRepo + let f = gitAnnexLocation g k + liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s + res <- liftIO $ storeHelper (conn, bucket) r enck tmp + s3Bool res storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ()) storeHelper (conn, bucket) r k file = do diff --git a/Utility.hs b/Utility.hs index 13ebbfccb..51bbc17a3 100644 --- a/Utility.hs +++ b/Utility.hs @@ -95,8 +95,9 @@ boolSystem command params = do restoresignals oldint oldset executeFile command True (toCommand params) Nothing -{- Escapes a filename to be safely able to be exposed to the shell. -} -shellEscape :: FilePath -> String +{- Escapes a filename or other parameter to be safely able to be exposed to + - the shell. -} +shellEscape :: String -> String shellEscape f = "'" ++ escaped ++ "'" where -- replace ' with '"'"' diff --git a/debian/changelog b/debian/changelog index ce5e651b0..78f65e8b9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,6 +2,8 @@ git-annex (0.20110426) UNRELEASED; urgency=low * Switch back to haskell SHA library, so git-annex remains buildable on Debian stable. + * Added rsync special remotes. This could be used, for example, to + store annexed content on rsync.net, encrypted naturally. Or anywhere else. -- Joey Hess <joeyh@debian.org> Tue, 26 Apr 2011 11:23:54 -0400 diff --git a/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn index becfe23c7..9807bf91e 100644 --- a/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn +++ b/doc/forum/wishlist:_special_remote_for_sftp_or_rsync.mdwn @@ -1,5 +1,21 @@ -i think it would be useful to have a fourth kind of [[special remote]]s that connects to a dumb storage using sftp or rsync. this can be emulated by using sshfs, but that means lots of round-trips through the system and is limited to platforms where sshfs is available. +i think it would be useful to have a fourth kind of [[special remote]]s +that connects to a dumb storage using sftp or rsync. this can be emulated +by using sshfs, but that means lots of round-trips through the system and +is limited to platforms where sshfs is available. -typical use cases are backups to storate shared between a group of people where each user only has limited access (sftp or rsync), when using [[bup]] is not an option. +typical use cases are backups to storate shared between a group of people +where each user only has limited access (sftp or rsync), when using [[bup]] +is not an option. -an alternative to implementing yet another special remote would be to have some kind of plugin system by which external programs can provide an interface to key-value stores (i'd implement the sftp backend myself, but haven't learned haskell yet). +an alternative to implementing yet another special remote would be to have +some kind of plugin system by which external programs can provide an +interface to key-value stores (i'd implement the sftp backend myself, but +haven't learned haskell yet). + +> Ask and ye [[shall receive|special_remotes/rsync]]. +> +> Sometimes I almost think that a generic configurable special remote that +> just uses configured shell commands would be useful.. But there's really +> no comparison with sitting down and writing code tuned to work with +> a given transport like rsync, when it comes to reliability and taking +> advantage of its abilities (like resuming). --[[Joey]] diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn index a33d3f612..210f995d2 100644 --- a/doc/special_remotes.mdwn +++ b/doc/special_remotes.mdwn @@ -9,6 +9,7 @@ They cannot be used by other git commands though. * [[Amazon_S3]] * [[bup]] * [[directory]] +* [[rsync]] ## Unused content on special remotes diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn index 8006c44fc..9e4bfa33b 100644 --- a/doc/special_remotes/directory.mdwn +++ b/doc/special_remotes/directory.mdwn @@ -7,4 +7,4 @@ the drive's mountpoint as a directory remote. Setup example: - # git annex initremote usbdrive directory=/media/usbdrive/ encryption=none + # git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none diff --git a/doc/special_remotes/rsync.mdwn b/doc/special_remotes/rsync.mdwn new file mode 100644 index 000000000..593644291 --- /dev/null +++ b/doc/special_remotes/rsync.mdwn @@ -0,0 +1,23 @@ +This special remote type rsyncs file contents to somewhere else. + +Setup example: + + # git annex initremote myrsync type=rsync rsyncurl=rsync://rsync.example.com/myrsync encryption=joey@kitenet.net + +## configuration + +These parameters can be passed to `git annex initremote` to configure rsync: + +* `encryption` - Required. Either "none" to disable encryption of content + stored in rsync, + or a value that can be looked up (using gpg -k) to find a gpg encryption + key that will be given access to the remote. Note that additional gpg + keys can be given access to a remote by rerunning initremote with + the new key id. See [[encryption]]. + +* `rsyncurl` - Required. This is the url or `hostname:/directory` to + pass to rsync to tell it where to store content. + +The `annex-rsync-options` git configuration setting can be used to pass +parameters to rsync. Note that it is **not safe** to put "--delete" +in `annex-rsync-options` when using rsync special remotes. |