diff options
author | Joey Hess <joeyh@joeyh.name> | 2017-09-04 14:33:09 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2017-09-04 14:39:32 -0400 |
commit | 25ed1e54abcc25f729fed016ec77a8cd049142fa (patch) | |
tree | f6130202fc2f23a6b641f8646705c900b757a012 /Command | |
parent | 3db6fd7621d3fa57bebfea216019e522f6d8d781 (diff) |
use export db to correctly handle duplicate files
Removed uncorrect UniqueKey key in db schema; a key can appear multiple
times with different files.
The database has to be flushed after each removal. But when adding files
to the export, lots of changes are able to be queued up w/o flushing.
So it's still fairly efficient.
If large removals of files from exports are too slow, an alternative
would be to make two passes over the diff, one pass queueing deletions
from the database, then a flush and the a second pass updating the
location log. But that would use more memory, and need to look up
exportKey twice per removed file, so I've avoided such optimisation yet.
This commit was supported by the NSF-funded DataLad project.
Diffstat (limited to 'Command')
-rw-r--r-- | Command/Export.hs | 41 |
1 files changed, 23 insertions, 18 deletions
diff --git a/Command/Export.hs b/Command/Export.hs index c09253dc9..3387a14ad 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -113,21 +113,20 @@ seek o = do startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart startExport r db ti = do ek <- exportKey (Git.LsTree.sha ti) - liftIO $ addExportLocation db (asKey ek) loc - stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do + stopUnless (liftIO $ notElem loc <$> getExportLocation db (asKey ek)) $ do showStart "export" f - next $ performExport r ek (Git.LsTree.sha ti) loc + next $ performExport r db ek (Git.LsTree.sha ti) loc where loc = ExportLocation $ toInternalGitPath f f = getTopFilePath $ Git.LsTree.file ti -performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform -performExport r ek contentsha loc = do +performExport :: Remote -> ExportHandle -> ExportKey -> Sha -> ExportLocation -> CommandPerform +performExport r db ek contentsha loc = do let storer = storeExport $ exportActions r sent <- case ek of AnnexKey k -> ifM (inAnnex k) ( metered Nothing k $ \m -> do - let rollback = void $ performUnexport r ek loc + let rollback = void $ performUnexport r db ek loc sendAnnex k rollback (\f -> storer f k loc m) , do @@ -142,11 +141,12 @@ performExport r ek contentsha loc = do liftIO $ hClose h storer tmp sha1k loc m if sent - then next $ cleanupExport r ek + then next $ cleanupExport r db ek loc else stop -cleanupExport :: Remote -> ExportKey -> CommandCleanup -cleanupExport r ek = do +cleanupExport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandCleanup +cleanupExport r db ek loc = do + liftIO $ addExportLocation db (asKey ek) loc logChange (asKey ek) (uuid r) InfoPresent return True @@ -154,23 +154,28 @@ startUnexport :: Remote -> ExportHandle -> Git.DiffTree.DiffTreeItem -> CommandS startUnexport r db diff | Git.DiffTree.srcsha diff /= nullSha = do showStart "unexport" f - oldk <- exportKey (Git.DiffTree.srcsha diff) - liftIO $ removeExportLocation db (asKey oldk) loc - next $ performUnexport r oldk loc + ek <- exportKey (Git.DiffTree.srcsha diff) + next $ performUnexport r db ek loc | otherwise = stop where loc = ExportLocation $ toInternalGitPath f f = getTopFilePath $ Git.DiffTree.file diff -performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform -performUnexport r ek loc = do +performUnexport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandPerform +performUnexport r db ek loc = do let remover = removeExport $ exportActions r ok <- remover (asKey ek) loc if ok - then next $ cleanupUnexport r ek + then next $ cleanupUnexport r db ek loc else stop -cleanupUnexport :: Remote -> ExportKey -> CommandCleanup -cleanupUnexport r ek = do - logChange (asKey ek) (uuid r) InfoMissing +cleanupUnexport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandCleanup +cleanupUnexport r db ek loc = do + liftIO $ do + removeExportLocation db (asKey ek) loc + -- Flush so that getExportLocation sees this and any + -- other removals of the key. + flushDbQueue db + whenM (liftIO $ null <$> getExportLocation db (asKey ek)) $ + logChange (asKey ek) (uuid r) InfoMissing return True |