From 3c2887bcfdacd2676386e2e97c29e22c7570cd67 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 28 Feb 2018 12:09:03 -0400 Subject: Support exporttree=yes for rsync special remotes. Renaming is not supported; it might be possible to use --fuzzy to get rsync to notice the file is being renamed, but that is a bit ..fuzzy. On the other hand, interrupted transfers of an exported file are resumed, since rsync is great at that. Had to adjust the exporttree docs, which said interrupted transfers would restart. Note that remove no longer makes the empty directory dummy, instead sending the top-level empty directory. This works just as well and I noticed the dummy was unncessary when refactoring it into removeGeneric. Verified that behavior of remove is not changed, and git annex testremote does pass. This commit was sponsored by Brock Spratlen on Patreon. --- CHANGELOG | 6 + Remote/Rsync.hs | 128 +++++++++++++++------ Remote/Rsync/RsyncUrl.hs | 5 +- doc/git-annex-export.mdwn | 2 +- doc/special_remotes/rsync.mdwn | 4 + ...ent_8_3b93389a1f50aad0f759d361f06200e9._comment | 9 ++ 6 files changed, 117 insertions(+), 37 deletions(-) create mode 100644 doc/todo/export/comment_8_3b93389a1f50aad0f759d361f06200e9._comment diff --git a/CHANGELOG b/CHANGELOG index 861db442b..83692a0e9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +git-annex (6.20180228) UNRELEASED; urgency=medium + + * Support exporttree=yes for rsync special remotes. + + -- Joey Hess Wed, 28 Feb 2018 11:53:03 -0400 + git-annex (6.20180227) upstream; urgency=medium * inprogress: Avoid showing failures for files not in progress. diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index dfac61542..7f687a7e2 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -1,6 +1,6 @@ {- A remote that is only accessible by rsync. - - - Copyright 2011 Joey Hess + - Copyright 2011-2018 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -29,6 +29,7 @@ import Annex.Ssh import Remote.Helper.Special import Remote.Helper.Messages import Remote.Helper.Export +import Types.Export import Remote.Rsync.RsyncUrl import Crypto import Utility.Rsync @@ -49,7 +50,7 @@ remote = RemoteType , enumerate = const (findSpecialRemotes "rsyncurl") , generate = gen , setup = rsyncSetup - , exportSupported = exportUnsupported + , exportSupported = exportIsSupported } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) @@ -75,7 +76,14 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False - , exportActions = exportUnsupported + , exportActions = return $ ExportActions + { storeExport = storeExportM o + , retrieveExport = retrieveExportM o + , removeExport = removeExportM o + , checkPresentExport = checkPresentExportM o + , removeExportDirectory = Just (removeExportDirectoryM o) + , renameExport = renameExportM o + } , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -165,14 +173,25 @@ rsyncSetup _ mu _ c gc = do - pass --include=X --include=X/Y --include=X/Y/file --exclude=*) -} store :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool -store o k src meterupdate = withRsyncScratchDir $ \tmp -> do - let dest = tmp Prelude.head (keyPaths k) - liftIO $ createDirectoryIfMissing True $ parentDir dest - ok <- liftIO $ if canrename +store o k src meterupdate = storeGeneric o meterupdate basedest populatedest + where + basedest = Prelude.head (keyPaths k) + populatedest dest = liftIO $ if canrename then do rename src dest return True else createLinkOrCopy src dest + {- If the key being sent is encrypted or chunked, the file + - containing its content is a temp file, and so can be + - renamed into place. Otherwise, the file is the annexed + - object file, and has to be copied or hard linked into place. -} + canrename = isEncKey k || isChunkKey k + +storeGeneric :: RsyncOpts -> MeterUpdate -> FilePath -> (FilePath -> Annex Bool) -> Annex Bool +storeGeneric o meterupdate basedest populatedest = withRsyncScratchDir $ \tmp -> do + let dest = tmp basedest + liftIO $ createDirectoryIfMissing True $ parentDir dest + ok <- populatedest dest ps <- sendParams if ok then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++ @@ -182,61 +201,97 @@ store o k src meterupdate = withRsyncScratchDir $ \tmp -> do , Param $ rsyncUrl o ] else return False - where - {- If the key being sent is encrypted or chunked, the file - - containing its content is a temp file, and so can be - - renamed into place. Otherwise, the file is the annexed - - object file, and has to be copied or hard linked into place. -} - canrename = isEncKey k || isChunkKey k retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex () retrieve o f k p = - unlessM (rsyncRetrieve o k f (Just p)) $ + unlessM (rsyncRetrieveKey o k f (Just p)) $ giveup "rsync failed" retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool -retrieveCheap o k _af f = ifM (preseedTmp k f) ( rsyncRetrieve o k f Nothing , return False ) +retrieveCheap o k _af f = ifM (preseedTmp k f) ( rsyncRetrieveKey o k f Nothing , return False ) remove :: RsyncOpts -> Remover -remove o k = do +remove o k = removeGeneric o includes + where + includes = concatMap use dirHashes + use h = let dir = h def k in + [ parentDir dir + , dir + -- match content directory and anything in it + , dir keyFile k "***" + ] + +{- An empty directory is rsynced to make it delete. Everything is excluded, + - except for the specified includes. Due to the way rsync traverses + - directories, the includes must match both the file to be deleted, and + - its parent directories, but not their other contents. -} +removeGeneric :: RsyncOpts -> [String] -> Annex Bool +removeGeneric o includes = do ps <- sendParams withRsyncScratchDir $ \tmp -> liftIO $ do {- Send an empty directory to rysnc to make it delete. -} - let dummy = tmp keyFile k - createDirectoryIfMissing True dummy rsync $ rsyncOptions o ++ ps ++ map (\s -> Param $ "--include=" ++ s) includes ++ [ Param "--exclude=*" -- exclude everything else , Param "--quiet", Param "--delete", Param "--recursive" ] ++ partialParams ++ - [ Param $ addTrailingPathSeparator dummy + [ Param $ addTrailingPathSeparator tmp , Param $ rsyncUrl o ] - where - {- Specify include rules to match the directories where the - - content could be. Note that the parent directories have - - to also be explicitly included, due to how rsync - - traverses directories. -} - includes = concatMap use dirHashes - use h = let dir = h def k in - [ parentDir dir - , dir - -- match content directory and anything in it - , dir keyFile k "***" - ] checkKey :: Git.Repo -> RsyncOpts -> CheckPresent checkKey r o k = do showChecking r + checkPresentGeneric o (rsyncUrls o k) + +checkPresentGeneric :: RsyncOpts -> [RsyncUrl] -> Annex Bool +checkPresentGeneric o rsyncurls = -- note: Does not currently differentiate between rsync failing -- to connect, and the file not being present. - untilTrue (rsyncUrls o k) $ \u -> + untilTrue rsyncurls $ \u -> liftIO $ catchBoolIO $ do withQuietOutput createProcessSuccess $ proc "rsync" $ toCommand $ rsyncOptions o ++ [Param u] return True +storeExportM :: RsyncOpts -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool +storeExportM o src _k loc meterupdate = + storeGeneric o meterupdate basedest populatedest + where + basedest = fromExportLocation loc + populatedest = liftIO . createLinkOrCopy src + +retrieveExportM :: RsyncOpts -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Bool +retrieveExportM o _k loc dest p = rsyncRetrieve o [rsyncurl] dest (Just p) + where + rsyncurl = mkRsyncUrl o (fromExportLocation loc) + +checkPresentExportM :: RsyncOpts -> Key -> ExportLocation -> Annex Bool +checkPresentExportM o _k loc = checkPresentGeneric o [rsyncurl] + where + rsyncurl = mkRsyncUrl o (fromExportLocation loc) + +removeExportM :: RsyncOpts -> Key -> ExportLocation -> Annex Bool +removeExportM o _k loc = + removeGeneric o (includes (fromExportLocation loc)) + where + includes f = f : case upFrom f of + Nothing -> [] + Just f' -> includes f' + +removeExportDirectoryM :: RsyncOpts -> ExportDirectory -> Annex Bool +removeExportDirectoryM o ed = removeGeneric o (allbelow d : includes d) + where + d = fromExportDirectory ed + allbelow f = f "***" + includes f = f : case upFrom f of + Nothing -> [] + Just f' -> includes f' + +renameExportM :: RsyncOpts -> Key -> ExportLocation -> ExportLocation -> Annex Bool +renameExportM _ _ _ _ = return False + {- Rsync params to enable resumes of sending files safely, - ensure that files are only moved into place once complete -} @@ -259,15 +314,18 @@ withRsyncScratchDir a = do t <- fromRepo gitAnnexTmpObjectDir withTmpDirIn t "rsynctmp" a -rsyncRetrieve :: RsyncOpts -> Key -> FilePath -> Maybe MeterUpdate -> Annex Bool -rsyncRetrieve o k dest meterupdate = - showResumable $ untilTrue (rsyncUrls o k) $ \u -> rsyncRemote Download o meterupdate +rsyncRetrieve :: RsyncOpts -> [RsyncUrl] -> FilePath -> Maybe MeterUpdate -> Annex Bool +rsyncRetrieve o rsyncurls dest meterupdate = + showResumable $ untilTrue rsyncurls $ \u -> rsyncRemote Download o meterupdate -- use inplace when retrieving to support resuming [ Param "--inplace" , Param u , File dest ] +rsyncRetrieveKey :: RsyncOpts -> Key -> FilePath -> Maybe MeterUpdate -> Annex Bool +rsyncRetrieveKey o k dest meterupdate = rsyncRetrieve o (rsyncUrls o k) dest meterupdate + showResumable :: Annex Bool -> Annex Bool showResumable a = ifM a ( return True diff --git a/Remote/Rsync/RsyncUrl.hs b/Remote/Rsync/RsyncUrl.hs index c0f30c1fb..67ce7946b 100644 --- a/Remote/Rsync/RsyncUrl.hs +++ b/Remote/Rsync/RsyncUrl.hs @@ -1,6 +1,6 @@ {- Rsync urls. - - - Copyright 2014 Joey Hess + - Copyright 2014-2018 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -36,6 +36,9 @@ rsyncEscape o u | rsyncShellEscape o && rsyncUrlIsShell (rsyncUrl o) = shellEscape u | otherwise = u +mkRsyncUrl :: RsyncOpts -> FilePath -> RsyncUrl +mkRsyncUrl o f = rsyncUrl o rsyncEscape o f + rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl] rsyncUrls o k = map use dirHashes where diff --git a/doc/git-annex-export.mdwn b/doc/git-annex-export.mdwn index a8f9f5cb3..1d7170cee 100644 --- a/doc/git-annex-export.mdwn +++ b/doc/git-annex-export.mdwn @@ -29,7 +29,7 @@ Repeated exports are done efficiently, by diffing the old and new tree, and transferring only the changed files, and renaming files as necessary. Exports can be interrupted and resumed. However, partially uploaded files -will be re-started from the beginning. +will be re-started from the beginning in most cases. Once content has been exported to a remote, commands like `git annex get` can download content from there the same as from other remotes. However, diff --git a/doc/special_remotes/rsync.mdwn b/doc/special_remotes/rsync.mdwn index 2271cbc2a..7d30b4092 100644 --- a/doc/special_remotes/rsync.mdwn +++ b/doc/special_remotes/rsync.mdwn @@ -22,6 +22,10 @@ These parameters can be passed to `git annex initremote` to configure rsync: * `keyid` - Specifies the gpg key to use for [[encryption]]. +* `exporttree` - Set to "yes" to make this special remote usable + by [[git-annex-export]]. It will not be usable as a general-purpose + special remote. + * `shellescape` - Optional. Set to "no" to avoid shell escaping normally done when using rsync over ssh. That escaping is needed with typical setups, but not with some hosting providers that do not expose rsynced diff --git a/doc/todo/export/comment_8_3b93389a1f50aad0f759d361f06200e9._comment b/doc/todo/export/comment_8_3b93389a1f50aad0f759d361f06200e9._comment new file mode 100644 index 000000000..c68f7161a --- /dev/null +++ b/doc/todo/export/comment_8_3b93389a1f50aad0f759d361f06200e9._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 8""" + date="2018-02-28T15:54:06Z" + content=""" +Remotes need to have a nontrivial amount of code added to them in order to +support export. That had not been done for rsync yet. I've implemented it +now. +"""]] -- cgit v1.2.3