From e68f128a9bf46c8f4ebe51fcb3b6f63955cadd2e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 27 Apr 2011 20:06:07 -0400 Subject: rsync special remote Fully tested and working, including resuming and encryption. (Though not resuming when sending *with* encryption; gpg doesn't produce identical output each time.) Uses same layout as the directory special remote and the .git/annex/objects/ directory. --- Remote/Git.hs | 2 +- Remote/Rsync.hs | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ Remote/S3real.hs | 20 +++--- 3 files changed, 202 insertions(+), 13 deletions(-) create mode 100644 Remote/Rsync.hs (limited to 'Remote') diff --git a/Remote/Git.hs b/Remote/Git.hs index bab452a33..e6df6be46 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -158,7 +158,7 @@ rsynchelper :: Git.Repo -> Bool -> Key -> FilePath -> Annex (Bool) rsynchelper r sending key file = do showProgress -- make way for progress bar p <- rsyncParams r sending key file - res <- liftIO $ boolSystem "rsync" p + res <- liftIO $ rsync p if res then return res else do diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs new file mode 100644 index 000000000..0a62ff92f --- /dev/null +++ b/Remote/Rsync.hs @@ -0,0 +1,193 @@ +{- A remote that is only accessible by rsync. + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Rsync (remote) where + +import qualified Data.ByteString.Lazy.Char8 as L +import Control.Exception.Extensible (IOException) +import qualified Data.Map as M +import Control.Monad.State (liftIO, when) +import System.FilePath +import System.Directory +import System.Posix.Files +import System.Posix.Process + +import RemoteClass +import Types +import qualified GitRepo as Git +import qualified Annex +import UUID +import Locations +import Config +import Content +import Utility +import Remote.Special +import Remote.Encryptable +import Crypto +import Messages +import RsyncFile + +type RsyncUrl = String + +data RsyncOpts = RsyncOpts { + rsyncUrl :: RsyncUrl, + rsyncOptions :: [CommandParam] +} + +remote :: RemoteType Annex +remote = RemoteType { + typename = "rsync", + enumerate = findSpecialRemotes "rsyncurl", + generate = gen, + setup = rsyncSetup +} + +gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex) +gen r u c = do + url <- getConfig r "rsyncurl" (error "missing rsyncurl") + opts <- getConfig r "rsync-options" "" + let o = RsyncOpts url $ map Param $ words opts + cst <- remoteCost r expensiveRemoteCost + return $ encryptableRemote c + (storeEncrypted o) + (retrieveEncrypted o) + Remote { + uuid = u, + cost = cst, + name = Git.repoDescribe r, + storeKey = store o, + retrieveKeyFile = retrieve o, + removeKey = remove o, + hasKey = checkPresent r o, + hasKeyCheap = True, + config = Nothing + } + +rsyncSetup :: UUID -> RemoteConfig -> Annex RemoteConfig +rsyncSetup u c = do + -- verify configuration is sane + let url = case M.lookup "rsyncurl" c of + Nothing -> error "Specify rsyncurl=" + Just d -> d + c' <- encryptionSetup c + + -- The rsyncurl is stored in git config, not only in this remote's + -- persistant state, so it can vary between hosts. + gitConfigSpecialRemote u c' "rsyncurl" url + return c' + +rsyncKey :: RsyncOpts -> Key -> String +rsyncKey o k = rsyncUrl o hashDirMixed k f f + where + f = keyFile k + +store :: RsyncOpts -> Key -> Annex Bool +store o k = do + g <- Annex.gitRepo + rsyncSend o k (gitAnnexLocation g k) + +storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> Annex Bool +storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do + g <- Annex.gitRepo + let f = gitAnnexLocation g k + liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s + rsyncSend o enck tmp + +retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool +retrieve o k f = rsyncRemote o + -- use inplace when retrieving to support resuming + [ Param "--inplace" + , Param $ rsyncKey o k + , Param f + ] + +retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool +retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do + res <- retrieve o enck tmp + if res + then liftIO $ catchBool $ do + withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f + return True + else return res + +remove :: RsyncOpts -> Key -> Annex Bool +remove o k = withRsyncScratchDir $ \tmp -> do + {- Send an empty directory to rysnc as the parent directory + - of the file to remove. -} + let dummy = tmp keyFile k + liftIO $ createDirectoryIfMissing True dummy + liftIO $ rsync $ rsyncOptions o ++ + [ Params "--delete --recursive" + , partialParams + , Param $ addTrailingPathSeparator dummy + , Param $ parentDir $ rsyncKey o k + ] + +checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either IOException Bool) +checkPresent r o k = do + showNote ("checking " ++ Git.repoDescribe r ++ "...") + -- note: Does not currently differnetiate between rsync failing + -- to connect, and the file not being present. + res <- liftIO $ boolSystem "sh" [Param "-c", Param cmd] + return $ Right res + where + cmd = "rsync --quiet " ++ testfile ++ " 2>/dev/null" + testfile = shellEscape $ rsyncKey o k + +{- Rsync params to enable resumes of sending files safely, + - ensure that files are only moved into place once complete + -} +partialParams :: CommandParam +partialParams = Params "--no-inplace --partial --partial-dir=.rsync-partial" + +{- Runs an action in an empty scratch directory that can be used to build + - up trees for rsync. -} +withRsyncScratchDir :: (FilePath -> Annex Bool) -> Annex Bool +withRsyncScratchDir a = do + g <- Annex.gitRepo + pid <- liftIO $ getProcessID + let tmp = gitAnnexTmpDir g "rsynctmp" show pid + nuke tmp + liftIO $ createDirectoryIfMissing True $ tmp + res <- a tmp + nuke tmp + return res + where + nuke d = liftIO $ do + e <- doesDirectoryExist d + when e $ liftIO $ removeDirectoryRecursive d + +rsyncRemote :: RsyncOpts -> [CommandParam] -> Annex Bool +rsyncRemote o params = do + showProgress -- make way for progress bar + res <- liftIO $ rsync $ rsyncOptions o ++ defaultParams ++ params + if res + then return res + else do + showLongNote "rsync failed -- run git annex again to resume file transfer" + return res + where + defaultParams = [Params "--progress"] + +{- To send a single key is slightly tricky; need to build up a temporary + directory structure to pass to rsync so it can create the hash + directories. -} +rsyncSend :: RsyncOpts -> Key -> FilePath -> Annex Bool +rsyncSend o k src = withRsyncScratchDir $ \tmp -> do + let dest = tmp hashDirMixed k f f + liftIO $ createDirectoryIfMissing True $ parentDir $ dest + liftIO $ createLink src dest + res <- rsyncRemote o + [ Param "--recursive" + , partialParams + -- tmp/ to send contents of tmp dir + , Param $ addTrailingPathSeparator tmp + , Param $ rsyncUrl o + ] + return res + where + f = keyFile k diff --git a/Remote/S3real.hs b/Remote/S3real.hs index 07e33368e..2e198f79d 100644 --- a/Remote/S3real.hs +++ b/Remote/S3real.hs @@ -19,7 +19,6 @@ import Control.Monad (when) import Control.Monad.State (liftIO) import System.Environment import System.Posix.Files -import System.Directory import RemoteClass import Types @@ -33,7 +32,7 @@ import Remote.Special import Remote.Encryptable import Crypto import Key -import Utility +import Content remote :: RemoteType Annex remote = RemoteType { @@ -108,18 +107,15 @@ store r k = s3Action r False $ \(conn, bucket) -> do s3Bool res storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool -storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do - g <- Annex.gitRepo - let f = gitAnnexLocation g k +storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> -- To get file size of the encrypted content, have to use a temp file. -- (An alternative would be chunking to to a constant size.) - let tmp = gitAnnexTmpLocation g enck - liftIO $ createDirectoryIfMissing True (parentDir tmp) - liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s - res <- liftIO $ storeHelper (conn, bucket) r enck tmp - tmp_exists <- liftIO $ doesFileExist tmp - when tmp_exists $ liftIO $ removeFile tmp - s3Bool res + withTmp enck $ \tmp -> do + g <- Annex.gitRepo + let f = gitAnnexLocation g k + liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s + res <- liftIO $ storeHelper (conn, bucket) r enck tmp + s3Bool res storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ()) storeHelper (conn, bucket) r k file = do -- cgit v1.2.3