From 25ed1e54abcc25f729fed016ec77a8cd049142fa Mon Sep 17 00:00:00 2001
From: Joey Hess <joeyh@joeyh.name>
Date: Mon, 4 Sep 2017 14:33:09 -0400
Subject: use export db to correctly handle duplicate files

Removed uncorrect UniqueKey key in db schema; a key can appear multiple
times with different files.

The database has to be flushed after each removal. But when adding files
to the export, lots of changes are able to be queued up w/o flushing.
So it's still fairly efficient.

If large removals of files from exports are too slow, an alternative
would be to make two passes over the diff, one pass queueing deletions
from the database, then a flush and the a second pass updating the
location log. But that would use more memory, and need to look up
exportKey twice per removed file, so I've avoided such optimisation yet.

This commit was supported by the NSF-funded DataLad project.
---
 Database/Export.hs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'Database')

diff --git a/Database/Export.hs b/Database/Export.hs
index e2986d075..dcef88854 100644
--- a/Database/Export.hs
+++ b/Database/Export.hs
@@ -16,6 +16,7 @@ module Database.Export (
 	closeDb,
 	addExportLocation,
 	removeExportLocation,
+	flushDbQueue,
 	getExportLocation,
 	ExportedId,
 ) where
@@ -37,7 +38,6 @@ Exported
   key IKey
   file SFilePath
   KeyFileIndex key file
-  UniqueKey key
 |]
 
 {- Opens the database, creating it if it doesn't exist yet. -}
@@ -74,7 +74,10 @@ removeExportLocation h k (ExportLocation f) = queueDb h $
 	ik = toIKey k
 	ef = toSFilePath f
 
-{- Doesn't know about recently queued changes. -}
+flushDbQueue :: ExportHandle -> IO ()
+flushDbQueue (ExportHandle h) = H.flushDbQueue h
+
+{- Note that this does not see recently queued changes. -}
 getExportLocation :: ExportHandle -> Key -> IO [ExportLocation]
 getExportLocation (ExportHandle h) k = H.queryDbQueue h $ do
 	l <- select $ from $ \r -> do
-- 
cgit v1.2.3