summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-09-07 14:32:47 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-09-07 14:32:47 -0400
commit6e7dcece6022e5078c2ef2c25d94ee1f23419234 (patch)
tree70c1b4c4ac3ed56bb2bf4f3b358aa1033283565f
parentd910a94df7d6f5c87897c248586cb65523457f99 (diff)
avoid renaming to temp files before deleting
Only rename when actually ncessary. The diff gets buffered in memory. Probably git has to buffer a diff in memory when generating it as well, so this memory usage should not be a problem, even when the diff is very large. I hope. This commit was supported by the NSF-funded DataLad project.
-rw-r--r--Command/Export.hs90
-rw-r--r--doc/todo/export.mdwn3
2 files changed, 61 insertions, 32 deletions
diff --git a/Command/Export.hs b/Command/Export.hs
index 2cf453ea1..09878dabf 100644
--- a/Command/Export.hs
+++ b/Command/Export.hs
@@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE TupleSections #-}
+
module Command.Export where
import Command
@@ -26,6 +28,7 @@ import Messages.Progress
import Utility.Tmp
import qualified Data.ByteString.Lazy as L
+import qualified Data.Map as M
cmd :: Command
cmd = command "export" SectionCommon
@@ -49,7 +52,7 @@ optParser _ = ExportOptions
-- An export includes both annexed files and files stored in git.
-- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key
- deriving (Show)
+ deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key
asKey (AnnexKey k) = k
@@ -103,17 +106,22 @@ seek o = do
case map exportedTreeish old of
[] -> return ()
[oldtreesha] -> do
- -- Rename all old files to temp.
- mapdiff
- (\diff -> startMoveToTempName r db (Git.DiffTree.file diff) (Git.DiffTree.srcsha diff))
- oldtreesha new
+ diffmap <- mkDiffMap oldtreesha new
+ let seekdiffmap a = seekActions $ pure $ map a (M.toList diffmap)
+ -- Rename old files to temp, or delete.
+ seekdiffmap $ \(ek, (moldf, mnewf)) ->
+ case (moldf, mnewf) of
+ (Just oldf, Just _newf) ->
+ startMoveToTempName r db oldf ek
+ (Just oldf, Nothing) ->
+ startUnexport' r db oldf ek
+ _ -> stop
-- Rename from temp to new files.
- mapdiff (\diff -> startMoveFromTempName r db (Git.DiffTree.dstsha diff) (Git.DiffTree.file diff))
- oldtreesha new
- -- Remove all remaining temps.
- mapdiff
- (startUnexportTempName r db . Git.DiffTree.srcsha)
- oldtreesha new
+ seekdiffmap $ \(ek, (moldf, mnewf)) ->
+ case (moldf, mnewf) of
+ (Just _oldf, Just newf) ->
+ startMoveFromTempName r db ek newf
+ _ -> stop
ts -> do
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
forM_ ts $ \oldtreesha -> do
@@ -126,7 +134,7 @@ seek o = do
, Git.DiffTree.dstsha d
]
-- Don't rename to temp, because the
- -- content is unknown; unexport instead.
+ -- content is unknown; delete instead.
mapdiff
(\diff -> startUnexport r db (Git.DiffTree.file diff) (unexportboth diff))
oldtreesha new
@@ -152,6 +160,28 @@ seek o = do
seekActions $ pure $ map a diff
void $ liftIO cleanup
+-- Map of old and new filenames for each changed ExportKey in a diff.
+type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath)
+
+mkDiffMap :: Git.Ref -> Git.Ref -> Annex DiffMap
+mkDiffMap old new = do
+ (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive old new
+ diffmap <- M.fromListWith combinedm . concat <$> forM diff mkdm
+ void $ liftIO cleanup
+ return diffmap
+ where
+ combinedm (srca, dsta) (srcb, dstb) = (srca <|> srcb, dsta <|> dstb)
+ mkdm i = do
+ srcek <- getk (Git.DiffTree.srcsha i)
+ dstek <- getk (Git.DiffTree.dstsha i)
+ return $ catMaybes
+ [ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek
+ , (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek
+ ]
+ getk sha
+ | sha == nullSha = return Nothing
+ | otherwise = Just <$> exportKey sha
+
startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart
startExport r db ti = do
ek <- exportKey (Git.LsTree.sha ti)
@@ -204,6 +234,14 @@ startUnexport r db f shas = do
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
+startUnexport' :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
+startUnexport' r db f ek = do
+ showStart "unexport" f'
+ next $ performUnexport r db [ek] loc
+ where
+ loc = ExportLocation $ toInternalGitPath f'
+ f' = getTopFilePath f
+
performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform
performUnexport r db eks loc = do
ifM (allM (\ek -> removeExport (exportActions r) (asKey ek) loc) eks)
@@ -236,27 +274,21 @@ startUnexportTempName r db sha
showStart "unexport" f
next $ performUnexport r db [ek] loc
-startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Git.Sha -> CommandStart
-startMoveToTempName r db f sha
- | sha == nullSha = stop
- | otherwise = do
- ek <- exportKey sha
- let tmploc@(ExportLocation tmpf) = exportTempName ek
- showStart "rename" (f' ++ " -> " ++ tmpf)
- next $ performRename r db ek loc tmploc
+startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
+startMoveToTempName r db f ek = do
+ let tmploc@(ExportLocation tmpf) = exportTempName ek
+ showStart "rename" (f' ++ " -> " ++ tmpf)
+ next $ performRename r db ek loc tmploc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
-startMoveFromTempName :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
-startMoveFromTempName r db sha f
- | sha == nullSha = stop
- | otherwise = do
- ek <- exportKey sha
- let tmploc@(ExportLocation tmpf) = exportTempName ek
- stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
- showStart "rename" (tmpf ++ " -> " ++ f')
- next $ performRename r db ek tmploc loc
+startMoveFromTempName :: Remote -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart
+startMoveFromTempName r db ek f = do
+ let tmploc@(ExportLocation tmpf) = exportTempName ek
+ stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
+ showStart "rename" (tmpf ++ " -> " ++ f')
+ next $ performRename r db ek tmploc loc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn
index 8f5c3f8f1..c4e57bd1c 100644
--- a/doc/todo/export.mdwn
+++ b/doc/todo/export.mdwn
@@ -24,8 +24,5 @@ Work is in progress. Todo list:
export from another repository also doesn't work right, because the
export database is not populated. So, seems that the export database needs
to get populated based on the export log in these cases.
-* Currently all modified/deleted files are renamed to temp files,
- even when they won't be used. Avoid doing this unless the
- temp file will be renamed to the new filename.
* Support export to aditional special remotes (S3 etc)
* Support export to external special remotes.