aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-08-31 15:41:48 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-08-31 15:47:23 -0400
commitc9629ab97875721c8d36bdaceec25768de610b5e (patch)
treeb6a5284cd81765d5097d16b2bad64534e7e3d9bd
parenta7383bc94e41d94e77e67406e1a4085d34241bfc (diff)
implement export.log and resolve export conflicts
Incremental export updates work now too. This commit was sponsored by Anthony DeRobertis on Patreon.
-rw-r--r--Command/Export.hs84
-rw-r--r--Logs.hs4
-rw-r--r--Logs/Export.hs67
-rw-r--r--doc/internals.mdwn8
-rw-r--r--doc/todo/export.mdwn6
-rw-r--r--git-annex.cabal1
6 files changed, 128 insertions, 42 deletions
diff --git a/Command/Export.hs b/Command/Export.hs
index aba8a1877..1310244ac 100644
--- a/Command/Export.hs
+++ b/Command/Export.hs
@@ -11,14 +11,16 @@ import Command
import qualified Git
import qualified Git.DiffTree
import qualified Git.LsTree
+import qualified Git.Ref
import Git.Types
-import Git.Sha
import Git.FilePath
+import Git.Sha
import Types.Key
import Types.Remote
import Annex.Content
import Annex.CatFile
import Logs.Location
+import Logs.Export
import Messages.Progress
import Utility.Tmp
@@ -67,45 +69,46 @@ exportKey sha = mk <$> catKey sha
seek :: ExportOptions -> CommandSeek
seek o = do
r <- getParsed (exportRemote o)
- let oldtreeish = emptyTree -- XXX temporary
-
- -- First, diff the old and new trees and update all changed
- -- files in the export.
- (diff, cleanup) <- inRepo $
- Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o)
- seekActions $ pure $ map (startDiff r) diff
- void $ liftIO cleanup
-
- -- In case a previous export was incomplete, make a pass
- -- over the whole tree and export anything that is not
- -- yet exported.
- (l, cleanup') <- inRepo $ Git.LsTree.lsTree (exportTreeish o)
- seekActions $ pure $ map (start r) l
- void $ liftIO cleanup'
+ new <- fromMaybe (error "unknown tree") <$>
+ inRepo (Git.Ref.sha (exportTreeish o))
+ old <- getExport (uuid r)
+
+ when (length old > 1) $
+ warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
+
+ -- First, diff the old and new trees and delete all changed
+ -- files in the export. Every file that remains in the export will
+ -- have the content from the new treeish.
+ --
+ -- (Also, when there was an export conflict, this resolves it.)
+ forM_ old $ \oldtreesha -> do
+ (diff, cleanup) <- inRepo $
+ Git.DiffTree.diffTreeRecursive oldtreesha new
+ seekActions $ pure $ map (startUnexport r) diff
+ void $ liftIO cleanup
+
+ -- Waiting until now to record the export guarantees that,
+ -- if this export is interrupted, there are no files left over
+ -- from a previous export, that are not part of this export.
+ recordExport (uuid r) $ ExportChange
+ { oldTreeish = old
+ , newTreeish = new
+ }
-startDiff :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
-startDiff r diff
- | Git.DiffTree.dstsha diff == nullSha = do
- showStart "unexport" f
- oldk <- exportKey (Git.DiffTree.srcsha diff)
- next $ performUnexport r oldk loc
- | otherwise = do
- showStart "export" f
- k <- exportKey (Git.DiffTree.dstsha diff)
- next $ performExport r k (Git.DiffTree.dstsha diff) loc
- where
- loc = ExportLocation $ toInternalGitPath $
- getTopFilePath $ Git.DiffTree.file diff
- f = getTopFilePath $ Git.DiffTree.file diff
+ -- Export everything that is not yet exported.
+ (l, cleanup') <- inRepo $ Git.LsTree.lsTree new
+ seekActions $ pure $ map (startExport r) l
+ void $ liftIO cleanup'
-start :: Remote -> Git.LsTree.TreeItem -> CommandStart
-start r ti = do
+startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart
+startExport r ti = do
ek <- exportKey (Git.LsTree.sha ti)
- stopUnless (elem (uuid r) <$> loggedLocations (asKey ek)) $
+ stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do
+ showStart "export" f
next $ performExport r ek (Git.LsTree.sha ti) loc
where
- loc = ExportLocation $ toInternalGitPath $
- getTopFilePath $ Git.LsTree.file ti
+ loc = ExportLocation $ toInternalGitPath f
+ f = getTopFilePath $ Git.LsTree.file ti
performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform
performExport r ek contentsha loc = case storeExport r of
@@ -137,6 +140,17 @@ cleanupExport r ek = do
logChange (asKey ek) (uuid r) InfoPresent
return True
+startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
+startUnexport r diff
+ | Git.DiffTree.srcsha diff /= nullSha = do
+ showStart "unexport" f
+ oldk <- exportKey (Git.DiffTree.srcsha diff)
+ next $ performUnexport r oldk loc
+ | otherwise = stop
+ where
+ loc = ExportLocation $ toInternalGitPath f
+ f = getTopFilePath $ Git.DiffTree.file diff
+
performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform
performUnexport r ek loc = case removeExport r of
Nothing -> error "remote does not support removing exported files"
diff --git a/Logs.hs b/Logs.hs
index 716520af4..7b6c7dd20 100644
--- a/Logs.hs
+++ b/Logs.hs
@@ -42,6 +42,7 @@ topLevelUUIDBasedLogs =
, activityLog
, differenceLog
, multicastLog
+ , exportLog
]
{- All the ways to get a key from a presence log file -}
@@ -97,6 +98,9 @@ differenceLog = "difference.log"
multicastLog :: FilePath
multicastLog = "multicast.log"
+exportLog :: FilePath
+exportLog = "export.log"
+
{- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> String
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"
diff --git a/Logs/Export.hs b/Logs/Export.hs
new file mode 100644
index 000000000..a0019a06c
--- /dev/null
+++ b/Logs/Export.hs
@@ -0,0 +1,67 @@
+{- git-annex export log
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.Export where
+
+import qualified Data.Map as M
+
+import Annex.Common
+import qualified Annex.Branch
+import qualified Git
+import Logs
+import Logs.UUIDBased
+import Annex.UUID
+
+-- | Get the treeish that was exported to a special remote.
+--
+-- If the list contains multiple items, there was an export conflict,
+-- and different trees were exported to the same special remote.
+getExport :: UUID -> Annex [Git.Ref]
+getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
+ . parseLogNew parseExportLog
+ <$> Annex.Branch.get exportLog
+ where
+ get (ExportLog t u)
+ | u == remoteuuid = Just t
+ | otherwise = Nothing
+
+data ExportChange = ExportChange
+ { oldTreeish :: [Git.Ref]
+ , newTreeish :: Git.Ref
+ }
+
+-- | Record a change in what's exported to a special remote.
+--
+-- Any entries in the log for the oldTreeish will be updated to the
+-- newTreeish. This way, when multiple repositories are exporting to
+-- the same special remote, there's no conflict as long as they move
+-- forward in lock-step.
+recordExport :: UUID -> ExportChange -> Annex ()
+recordExport remoteuuid ec = do
+ c <- liftIO currentVectorClock
+ u <- getUUID
+ let val = ExportLog (newTreeish ec) remoteuuid
+ Annex.Branch.change exportLog $
+ showLogNew formatExportLog
+ . changeLog c u val
+ . M.mapWithKey (updateothers c u)
+ . parseLogNew parseExportLog
+ where
+ updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid'))
+ | u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
+ | otherwise = LogEntry c (ExportLog (newTreeish ec) theiru)
+
+data ExportLog = ExportLog Git.Ref UUID
+
+formatExportLog :: ExportLog -> String
+formatExportLog (ExportLog treeish remoteuuid) =
+ Git.fromRef treeish ++ " " ++ fromUUID remoteuuid
+
+parseExportLog :: String -> Maybe ExportLog
+parseExportLog s = case words s of
+ (t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u)
+ _ -> Nothing
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index 7d39b1068..4b24ce443 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -185,8 +185,12 @@ content expression.
Tracks what trees have been exported to special remotes by
[[git-annex-export]](1).
-Each line starts with a timestamp, then the uuid of the special remote,
-followed by the sha1 of the tree that was exported to that special remote.
+Each line starts with a timestamp, then the uuid of the repository
+that exported to the special remote, followed by the sha1 of the tree
+that was exported, and then by the uuid of the special remote. For example:
+
+ 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
+ 1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
(The exported tree is also grafted into the git-annex branch, at
`export.tree`, to prevent git from garbage collecting it. However, the head
diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn
index 354dc84e7..914febe34 100644
--- a/doc/todo/export.mdwn
+++ b/doc/todo/export.mdwn
@@ -17,14 +17,10 @@ there need to be a new interface in supported remotes?
Work is in progress. Todo list:
-* Remember the previously exported tree (in git-annex branch, see design)
- and use to make next export more efficient.
* Only export to remotes that were initialized to support it.
* Prevent using export remotes for key/value storage.
-* When exporting, update location tracking to allow getting from exports,
* Use retrieveExport when getting from export remotes.
* Efficient handling of renames.
-* Detect export conflicts (see design)
* Support export to aditional special remotes (S3 etc)
* Support export to external special remotes.
* If the same content is present in two different files, export
@@ -36,4 +32,4 @@ Work is in progress. Todo list:
And, once one of the files is uploaded, the location log will
say the content is present, so the pass over the tree won't try to
- upload the other file.
+ upload the other file. (See design for a fix for this.)
diff --git a/git-annex.cabal b/git-annex.cabal
index 16b6bda27..a7d062857 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -849,6 +849,7 @@ Executable git-annex
Logs.Config
Logs.Difference
Logs.Difference.Pure
+ Logs.Export
Logs.FsckResults
Logs.Group
Logs.Line