diff options
author | Joey Hess <joeyh@joeyh.name> | 2017-08-31 15:41:48 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2017-08-31 15:47:23 -0400 |
commit | c9629ab97875721c8d36bdaceec25768de610b5e (patch) | |
tree | b6a5284cd81765d5097d16b2bad64534e7e3d9bd | |
parent | a7383bc94e41d94e77e67406e1a4085d34241bfc (diff) |
implement export.log and resolve export conflicts
Incremental export updates work now too.
This commit was sponsored by Anthony DeRobertis on Patreon.
-rw-r--r-- | Command/Export.hs | 84 | ||||
-rw-r--r-- | Logs.hs | 4 | ||||
-rw-r--r-- | Logs/Export.hs | 67 | ||||
-rw-r--r-- | doc/internals.mdwn | 8 | ||||
-rw-r--r-- | doc/todo/export.mdwn | 6 | ||||
-rw-r--r-- | git-annex.cabal | 1 |
6 files changed, 128 insertions, 42 deletions
diff --git a/Command/Export.hs b/Command/Export.hs index aba8a1877..1310244ac 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -11,14 +11,16 @@ import Command import qualified Git import qualified Git.DiffTree import qualified Git.LsTree +import qualified Git.Ref import Git.Types -import Git.Sha import Git.FilePath +import Git.Sha import Types.Key import Types.Remote import Annex.Content import Annex.CatFile import Logs.Location +import Logs.Export import Messages.Progress import Utility.Tmp @@ -67,45 +69,46 @@ exportKey sha = mk <$> catKey sha seek :: ExportOptions -> CommandSeek seek o = do r <- getParsed (exportRemote o) - let oldtreeish = emptyTree -- XXX temporary - - -- First, diff the old and new trees and update all changed - -- files in the export. - (diff, cleanup) <- inRepo $ - Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o) - seekActions $ pure $ map (startDiff r) diff - void $ liftIO cleanup - - -- In case a previous export was incomplete, make a pass - -- over the whole tree and export anything that is not - -- yet exported. - (l, cleanup') <- inRepo $ Git.LsTree.lsTree (exportTreeish o) - seekActions $ pure $ map (start r) l - void $ liftIO cleanup' + new <- fromMaybe (error "unknown tree") <$> + inRepo (Git.Ref.sha (exportTreeish o)) + old <- getExport (uuid r) + + when (length old > 1) $ + warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.." + + -- First, diff the old and new trees and delete all changed + -- files in the export. Every file that remains in the export will + -- have the content from the new treeish. + -- + -- (Also, when there was an export conflict, this resolves it.) + forM_ old $ \oldtreesha -> do + (diff, cleanup) <- inRepo $ + Git.DiffTree.diffTreeRecursive oldtreesha new + seekActions $ pure $ map (startUnexport r) diff + void $ liftIO cleanup + + -- Waiting until now to record the export guarantees that, + -- if this export is interrupted, there are no files left over + -- from a previous export, that are not part of this export. + recordExport (uuid r) $ ExportChange + { oldTreeish = old + , newTreeish = new + } -startDiff :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart -startDiff r diff - | Git.DiffTree.dstsha diff == nullSha = do - showStart "unexport" f - oldk <- exportKey (Git.DiffTree.srcsha diff) - next $ performUnexport r oldk loc - | otherwise = do - showStart "export" f - k <- exportKey (Git.DiffTree.dstsha diff) - next $ performExport r k (Git.DiffTree.dstsha diff) loc - where - loc = ExportLocation $ toInternalGitPath $ - getTopFilePath $ Git.DiffTree.file diff - f = getTopFilePath $ Git.DiffTree.file diff + -- Export everything that is not yet exported. + (l, cleanup') <- inRepo $ Git.LsTree.lsTree new + seekActions $ pure $ map (startExport r) l + void $ liftIO cleanup' -start :: Remote -> Git.LsTree.TreeItem -> CommandStart -start r ti = do +startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart +startExport r ti = do ek <- exportKey (Git.LsTree.sha ti) - stopUnless (elem (uuid r) <$> loggedLocations (asKey ek)) $ + stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do + showStart "export" f next $ performExport r ek (Git.LsTree.sha ti) loc where - loc = ExportLocation $ toInternalGitPath $ - getTopFilePath $ Git.LsTree.file ti + loc = ExportLocation $ toInternalGitPath f + f = getTopFilePath $ Git.LsTree.file ti performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform performExport r ek contentsha loc = case storeExport r of @@ -137,6 +140,17 @@ cleanupExport r ek = do logChange (asKey ek) (uuid r) InfoPresent return True +startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart +startUnexport r diff + | Git.DiffTree.srcsha diff /= nullSha = do + showStart "unexport" f + oldk <- exportKey (Git.DiffTree.srcsha diff) + next $ performUnexport r oldk loc + | otherwise = stop + where + loc = ExportLocation $ toInternalGitPath f + f = getTopFilePath $ Git.DiffTree.file diff + performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform performUnexport r ek loc = case removeExport r of Nothing -> error "remote does not support removing exported files" @@ -42,6 +42,7 @@ topLevelUUIDBasedLogs = , activityLog , differenceLog , multicastLog + , exportLog ] {- All the ways to get a key from a presence log file -} @@ -97,6 +98,9 @@ differenceLog = "difference.log" multicastLog :: FilePath multicastLog = "multicast.log" +exportLog :: FilePath +exportLog = "export.log" + {- The pathname of the location log file for a given key. -} locationLogFile :: GitConfig -> Key -> String locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log" diff --git a/Logs/Export.hs b/Logs/Export.hs new file mode 100644 index 000000000..a0019a06c --- /dev/null +++ b/Logs/Export.hs @@ -0,0 +1,67 @@ +{- git-annex export log + - + - Copyright 2017 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Export where + +import qualified Data.Map as M + +import Annex.Common +import qualified Annex.Branch +import qualified Git +import Logs +import Logs.UUIDBased +import Annex.UUID + +-- | Get the treeish that was exported to a special remote. +-- +-- If the list contains multiple items, there was an export conflict, +-- and different trees were exported to the same special remote. +getExport :: UUID -> Annex [Git.Ref] +getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap + . parseLogNew parseExportLog + <$> Annex.Branch.get exportLog + where + get (ExportLog t u) + | u == remoteuuid = Just t + | otherwise = Nothing + +data ExportChange = ExportChange + { oldTreeish :: [Git.Ref] + , newTreeish :: Git.Ref + } + +-- | Record a change in what's exported to a special remote. +-- +-- Any entries in the log for the oldTreeish will be updated to the +-- newTreeish. This way, when multiple repositories are exporting to +-- the same special remote, there's no conflict as long as they move +-- forward in lock-step. +recordExport :: UUID -> ExportChange -> Annex () +recordExport remoteuuid ec = do + c <- liftIO currentVectorClock + u <- getUUID + let val = ExportLog (newTreeish ec) remoteuuid + Annex.Branch.change exportLog $ + showLogNew formatExportLog + . changeLog c u val + . M.mapWithKey (updateothers c u) + . parseLogNew parseExportLog + where + updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid')) + | u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le + | otherwise = LogEntry c (ExportLog (newTreeish ec) theiru) + +data ExportLog = ExportLog Git.Ref UUID + +formatExportLog :: ExportLog -> String +formatExportLog (ExportLog treeish remoteuuid) = + Git.fromRef treeish ++ " " ++ fromUUID remoteuuid + +parseExportLog :: String -> Maybe ExportLog +parseExportLog s = case words s of + (t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u) + _ -> Nothing diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 7d39b1068..4b24ce443 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -185,8 +185,12 @@ content expression. Tracks what trees have been exported to special remotes by [[git-annex-export]](1). -Each line starts with a timestamp, then the uuid of the special remote, -followed by the sha1 of the tree that was exported to that special remote. +Each line starts with a timestamp, then the uuid of the repository +that exported to the special remote, followed by the sha1 of the tree +that was exported, and then by the uuid of the special remote. For example: + + 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 + 1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55 (The exported tree is also grafted into the git-annex branch, at `export.tree`, to prevent git from garbage collecting it. However, the head diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 354dc84e7..914febe34 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -17,14 +17,10 @@ there need to be a new interface in supported remotes? Work is in progress. Todo list: -* Remember the previously exported tree (in git-annex branch, see design) - and use to make next export more efficient. * Only export to remotes that were initialized to support it. * Prevent using export remotes for key/value storage. -* When exporting, update location tracking to allow getting from exports, * Use retrieveExport when getting from export remotes. * Efficient handling of renames. -* Detect export conflicts (see design) * Support export to aditional special remotes (S3 etc) * Support export to external special remotes. * If the same content is present in two different files, export @@ -36,4 +32,4 @@ Work is in progress. Todo list: And, once one of the files is uploaded, the location log will say the content is present, so the pass over the tree won't try to - upload the other file. + upload the other file. (See design for a fix for this.) diff --git a/git-annex.cabal b/git-annex.cabal index 16b6bda27..a7d062857 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -849,6 +849,7 @@ Executable git-annex Logs.Config Logs.Difference Logs.Difference.Pure + Logs.Export Logs.FsckResults Logs.Group Logs.Line |