summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-09-06 13:39:33 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-09-06 13:45:03 -0400
commit9dd2651e8e5efbbf3a9cc59cab3afa1fef7446f2 (patch)
tree31afde6ea8072120ed0bf7643b2f2d968d95b69d
parentf1b255623bc026d1480d44808cfc30507537cda1 (diff)
record incomplete exports in export.log
Not yet used, but essential for resuming cleanly. Note that, in normmal operation, only one commit is made to export.log during an export; the incomplete version only gets to the journal and is then overwritten. This commit was supported by the NSF-funded DataLad project.
-rw-r--r--Command/Export.hs7
-rw-r--r--Logs/Export.hs56
-rw-r--r--doc/internals.mdwn15
3 files changed, 60 insertions, 18 deletions
diff --git a/Command/Export.hs b/Command/Export.hs
index 3387a14ad..878cda8e3 100644
--- a/Command/Export.hs
+++ b/Command/Export.hs
@@ -79,9 +79,10 @@ seek o = do
inRepo (Git.Ref.tree (exportTreeish o))
old <- getExport (uuid r)
+ recordExportBeginning (uuid r) new
when (length old > 1) $
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
-
+
db <- openDb (uuid r)
-- First, diff the old and new trees and delete all changed
@@ -89,7 +90,7 @@ seek o = do
-- have the content from the new treeish.
--
-- (Also, when there was an export conflict, this resolves it.)
- forM_ old $ \oldtreesha -> do
+ forM_ (map exportedTreeish old) $ \oldtreesha -> do
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreesha new
seekActions $ pure $ map (startUnexport r db) diff
@@ -99,7 +100,7 @@ seek o = do
-- if this export is interrupted, there are no files left over
-- from a previous export, that are not part of this export.
recordExport (uuid r) $ ExportChange
- { oldTreeish = old
+ { oldTreeish = map exportedTreeish old
, newTreeish = new
}
diff --git a/Logs/Export.hs b/Logs/Export.hs
index 1fd1460fc..3ba77cd24 100644
--- a/Logs/Export.hs
+++ b/Logs/Export.hs
@@ -14,22 +14,29 @@ import qualified Annex.Branch
import qualified Git
import qualified Git.Branch
import Git.Tree
+import Git.Sha
import Git.FilePath
import Logs
import Logs.UUIDBased
import Annex.UUID
--- | Get the treeish that was exported to a special remote.
+data Exported = Exported
+ { exportedTreeish :: Git.Ref
+ , incompleteExportedTreeish :: [Git.Ref]
+ }
+ deriving (Eq)
+
+-- | Get what's been exported to a special remote.
--
-- If the list contains multiple items, there was an export conflict,
-- and different trees were exported to the same special remote.
-getExport :: UUID -> Annex [Git.Ref]
+getExport :: UUID -> Annex [Exported]
getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
. parseLogNew parseExportLog
<$> Annex.Branch.get exportLog
where
- get (ExportLog t u)
- | u == remoteuuid = Just t
+ get (ExportLog exported u)
+ | u == remoteuuid = Just exported
| otherwise = Nothing
data ExportChange = ExportChange
@@ -39,6 +46,10 @@ data ExportChange = ExportChange
-- | Record a change in what's exported to a special remote.
--
+-- This is called before an export begins uploading new files to the
+-- remote, but after it's cleaned up any files that need to be deleted
+-- from the old treeish.
+--
-- Any entries in the log for the oldTreeish will be updated to the
-- newTreeish. This way, when multiple repositories are exporting to
-- the same special remote, there's no conflict as long as they move
@@ -50,27 +61,48 @@ recordExport :: UUID -> ExportChange -> Annex ()
recordExport remoteuuid ec = do
c <- liftIO currentVectorClock
u <- getUUID
- let val = ExportLog (newTreeish ec) remoteuuid
+ let val = ExportLog (Exported (newTreeish ec) []) remoteuuid
Annex.Branch.change exportLog $
showLogNew formatExportLog
. changeLog c u val
. M.mapWithKey (updateothers c u)
. parseLogNew parseExportLog
- graftTreeish (newTreeish ec)
where
- updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid'))
+ updateothers c u theiru le@(LogEntry _ (ExportLog exported@(Exported { exportedTreeish = t }) remoteuuid'))
| u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
- | otherwise = LogEntry c (ExportLog (newTreeish ec) theiru)
+ | otherwise = LogEntry c (ExportLog (exported { exportedTreeish = newTreeish ec }) theiru)
+
+-- | Record the beginning of an export, to allow cleaning up from
+-- interrupted exports.
+--
+-- This is called before any changes are made to the remote.
+recordExportBeginning :: UUID -> Git.Ref -> Annex ()
+recordExportBeginning remoteuuid newtree = do
+ c <- liftIO currentVectorClock
+ u <- getUUID
+ ExportLog old _ <- fromMaybe (ExportLog (Exported emptyTree []) remoteuuid)
+ . M.lookup u . simpleMap
+ . parseLogNew parseExportLog
+ <$> Annex.Branch.get exportLog
+ let new = old { incompleteExportedTreeish = newtree:incompleteExportedTreeish old }
+ Annex.Branch.change exportLog $
+ showLogNew formatExportLog
+ . changeLog c u (ExportLog new remoteuuid)
+ . parseLogNew parseExportLog
+ graftTreeish newtree
-data ExportLog = ExportLog Git.Ref UUID
+data ExportLog = ExportLog Exported UUID
formatExportLog :: ExportLog -> String
-formatExportLog (ExportLog treeish remoteuuid) =
- Git.fromRef treeish ++ " " ++ fromUUID remoteuuid
+formatExportLog (ExportLog exported remoteuuid) = unwords $
+ [ Git.fromRef (exportedTreeish exported)
+ , fromUUID remoteuuid
+ ] ++ map Git.fromRef (incompleteExportedTreeish exported)
parseExportLog :: String -> Maybe ExportLog
parseExportLog s = case words s of
- (t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u)
+ (et:u:it) -> Just $
+ ExportLog (Exported (Git.Ref et) (map Git.Ref it)) (toUUID u)
_ -> Nothing
-- To prevent git-annex branch merge conflicts, the treeish is
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index 4b24ce443..ccf1e09b6 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -187,12 +187,21 @@ Tracks what trees have been exported to special remotes by
Each line starts with a timestamp, then the uuid of the repository
that exported to the special remote, followed by the sha1 of the tree
-that was exported, and then by the uuid of the special remote. For example:
+that was exported, and then by the uuid of the special remote.
- 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
+There can also be subsequent sha1s, of trees that have started to be
+exported but whose export is not yet complete. The sha1 of the exported
+tree can be the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904)
+in order to record the beginning of the first export.
+
+For example:
+
+ 1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 4b825dc642cb6eb9a060e54bf8d69288fbee4904 26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b
+ 1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
+ 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 7c7af825782b7c8706039b855c72709993542be4
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
-(The exported tree is also grafted into the git-annex branch, at
+(The trees are also grafted into the git-annex branch, at
`export.tree`, to prevent git from garbage collecting it. However, the head
of the git-annex branch should never contain such a grafted in tree;
the grafted tree is removed in the same commit that updates `export.log`.)