summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-09-18 18:40:16 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-09-18 19:21:41 -0400
commit36533ce176ec653f62fe740f9907f527e4f36361 (patch)
tree0c507668c7d19950efa7877ca2155e71ccdb1472
parent7576fff0131d4f86dc495d58f62490c3264e0e54 (diff)
merge changes made on other repos into ExportTree
Now when one repository has exported a tree, another repository can get files from the export, after syncing. There's a bug: While the database update works, somehow the database on disk does not get updated, and so the database update is run the next time, etc. Wasn't able to figure out why yet. This commit was sponsored by Ole-Morten Duesund on Patreon.
-rw-r--r--Annex/Locations.hs2
-rw-r--r--Command/Export.hs28
-rw-r--r--Database/Export.hs52
-rw-r--r--Remote/Helper/Export.hs61
-rw-r--r--doc/todo/export.mdwn34
5 files changed, 90 insertions, 87 deletions
diff --git a/Annex/Locations.hs b/Annex/Locations.hs
index 947cceef9..f86dfc6f4 100644
--- a/Annex/Locations.hs
+++ b/Annex/Locations.hs
@@ -303,7 +303,7 @@ gitAnnexExportDbDir u r = gitAnnexExportDir u r </> "db"
{- Lock file for export state for a special remote. -}
gitAnnexExportLock :: UUID -> Git.Repo -> FilePath
-gitAnnexExportLock u r = gitAnnexExportDir u r ++ ".lck"
+gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
diff --git a/Command/Export.hs b/Command/Export.hs
index 811e2351a..02c64eadf 100644
--- a/Command/Export.hs
+++ b/Command/Export.hs
@@ -27,7 +27,6 @@ import Annex.LockFile
import Logs.Location
import Logs.Export
import Database.Export
-import Remote.Helper.Export
import Messages.Progress
import Utility.Tmp
@@ -129,7 +128,7 @@ seek' o r = do
(\diff -> startUnexport r ea db (Git.DiffTree.file diff) (unexportboth diff))
oldtreesha new
updateExportTree db emptyTree new
- liftIO $ recordDataSource db new
+ liftIO $ recordExportTreeCurrent db new
-- Waiting until now to record the export guarantees that,
-- if this export is interrupted, there are no files left over
@@ -312,3 +311,28 @@ cleanupRename ea db ek src dest = do
if exportDirectories src /= exportDirectories dest
then removeEmptyDirectories ea db src [asKey ek]
else return True
+
+-- | Remove empty directories from the export. Call after removing an
+-- exported file, and after calling removeExportLocation and flushing the
+-- database.
+removeEmptyDirectories :: ExportActions Annex -> ExportHandle -> ExportLocation -> [Key] -> Annex Bool
+removeEmptyDirectories ea db loc ks
+ | null (exportDirectories loc) = return True
+ | otherwise = case removeExportDirectory ea of
+ Nothing -> return True
+ Just removeexportdirectory -> do
+ ok <- allM (go removeexportdirectory)
+ (reverse (exportDirectories loc))
+ unless ok $ liftIO $ do
+ -- Add location back to export database,
+ -- so this is tried again next time.
+ forM_ ks $ \k ->
+ addExportedLocation db k loc
+ flushDbQueue db
+ return ok
+ where
+ go removeexportdirectory d =
+ ifM (liftIO $ isExportDirectoryEmpty db d)
+ ( removeexportdirectory d
+ , return True
+ )
diff --git a/Database/Export.hs b/Database/Export.hs
index ad106f84e..322ab48fd 100644
--- a/Database/Export.hs
+++ b/Database/Export.hs
@@ -15,21 +15,21 @@ module Database.Export (
openDb,
closeDb,
flushDbQueue,
- recordDataSource,
- getDataSource,
addExportedLocation,
removeExportedLocation,
getExportedLocation,
isExportDirectoryEmpty,
+ getExportTreeCurrent,
+ recordExportTreeCurrent,
getExportTree,
addExportTree,
removeExportTree,
updateExportTree,
updateExportTree',
ExportedId,
- ExportTreeId,
ExportedDirectoryId,
- DataSourceId,
+ ExportTreeId,
+ ExportTreeCurrentId,
) where
import Database.Types
@@ -50,29 +50,33 @@ import Database.Esqueleto hiding (Key)
newtype ExportHandle = ExportHandle H.DbQueue
share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase|
--- Files that have been exported to the remote.
+-- Files that have been exported to the remote and are present on it.
Exported
key IKey
file SFilePath
ExportedIndex key file
--- The tree that has been exported to the remote.
--- Not all of these files are necessarily present on the remote yet.
-ExportTree
- key IKey
- file SFilePath
- ExportTreeIndex key file
-- Directories that exist on the remote, and the files that are in them.
ExportedDirectory
subdir SFilePath
file SFilePath
ExportedDirectoryIndex subdir file
--- Record of what tree the current database content comes from.
-DataSource
+-- The content of the tree that has been exported to the remote.
+-- Not all of these files are necessarily present on the remote yet.
+ExportTree
+ key IKey
+ file SFilePath
+ ExportTreeIndex key file
+-- The tree stored in ExportTree
+ExportTreeCurrent
tree SRef
UniqueTree tree
|]
-{- Opens the database, creating it if it doesn't exist yet. -}
+{- Opens the database, creating it if it doesn't exist yet.
+ -
+ - Only a single process should write to the export at a time, so guard
+ - any writes with the gitAnnexExportLock.
+ -}
openDb :: UUID -> Annex ExportHandle
openDb u = do
dbdir <- fromRepo (gitAnnexExportDbDir u)
@@ -97,19 +101,19 @@ queueDb (ExportHandle h) = H.queueDb h checkcommit
flushDbQueue :: ExportHandle -> IO ()
flushDbQueue (ExportHandle h) = H.flushDbQueue h
-recordDataSource :: ExportHandle -> Sha -> IO ()
-recordDataSource h s = queueDb h $ do
+recordExportTreeCurrent :: ExportHandle -> Sha -> IO ()
+recordExportTreeCurrent h s = queueDb h $ do
delete $ from $ \r -> do
- where_ (r ^. DataSourceTree ==. r ^. DataSourceTree)
- void $ insertUnique $ DataSource (toSRef s)
+ where_ (r ^. ExportTreeCurrentTree ==. r ^. ExportTreeCurrentTree)
+ void $ insertUnique $ ExportTreeCurrent $ toSRef s
-getDataSource :: ExportHandle -> IO (Maybe Sha)
-getDataSource (ExportHandle h) = H.queryDbQueue h $ do
+getExportTreeCurrent :: ExportHandle -> IO (Maybe Sha)
+getExportTreeCurrent (ExportHandle h) = H.queryDbQueue h $ do
l <- select $ from $ \r -> do
- where_ (r ^. DataSourceTree ==. r ^. DataSourceTree)
- return (r ^. DataSourceTree)
+ where_ (r ^. ExportTreeCurrentTree ==. r ^. ExportTreeCurrentTree)
+ return (r ^. ExportTreeCurrentTree)
case l of
- (s:[]) -> return (Just (fromSRef (unValue s)))
+ (s:[]) -> return $ Just $ fromSRef $ unValue s
_ -> return Nothing
addExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
@@ -167,7 +171,7 @@ getExportTree (ExportHandle h) k = H.queryDbQueue h $ do
addExportTree :: ExportHandle -> Key -> ExportLocation -> IO ()
addExportTree h k loc = queueDb h $
- void $ insertUnique $ Exported ik ef
+ void $ insertUnique $ ExportTree ik ef
where
ik = toIKey k
ef = toSFilePath (fromExportLocation loc)
diff --git a/Remote/Helper/Export.hs b/Remote/Helper/Export.hs
index 9b31baca3..d62c5a7e8 100644
--- a/Remote/Helper/Export.hs
+++ b/Remote/Helper/Export.hs
@@ -12,13 +12,16 @@ module Remote.Helper.Export where
import Annex.Common
import Types.Remote
import Types.Backend
-import Types.Export
import Types.Key
import Backend
import Remote.Helper.Encryptable (isEncrypted)
import Database.Export
+import Logs.Export
+import Annex.LockFile
+import Git.Sha
import qualified Data.Map as M
+import Control.Concurrent.STM
-- | Use for remotes that do not support exports.
class HasExportUnsupported a where
@@ -89,6 +92,33 @@ adjustExportable r = case M.lookup "exporttree" (config r) of
}
isexport = do
db <- openDb (uuid r)
+
+ updateflag <- liftIO newEmptyTMVarIO
+ let updateonce = liftIO $ atomically $
+ ifM (isEmptyTMVar updateflag)
+ ( do
+ putTMVar updateflag ()
+ return True
+ , return False
+ )
+
+ -- Get export locations for a key. Checks once
+ -- if the export log is different than the database and
+ -- updates the database, to notice when an export has been
+ -- updated from another repository.
+ let getexportlocs = \k -> do
+ whenM updateonce $ withExclusiveLock (gitAnnexExportLock (uuid r)) $ do
+ old <- liftIO $ fromMaybe emptyTree
+ <$> getExportTreeCurrent db
+ l <- getExport (uuid r)
+ case map exportedTreeish l of
+ (new:[]) | new /= old -> do
+ updateExportTree db old new
+ liftIO $ recordExportTreeCurrent db new
+ liftIO $ flushDbQueue db
+ _ -> return ()
+ liftIO $ getExportTree db k
+
return $ r
-- Storing a key on an export could be implemented,
-- but it would perform unncessary work
@@ -104,7 +134,7 @@ adjustExportable r = case M.lookup "exporttree" (config r) of
, retrieveKeyFile = \k _af dest p -> unVerified $
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
then do
- locs <- liftIO $ getExportTree db k
+ locs <- getexportlocs k
case locs of
[] -> do
warning "unknown export location"
@@ -135,34 +165,9 @@ adjustExportable r = case M.lookup "exporttree" (config r) of
, checkPresent = \k -> do
ea <- exportActions r
anyM (checkPresentExport ea k)
- =<< liftIO (getExportTree db k)
+ =<< getexportlocs k
, mkUnavailable = return Nothing
, getInfo = do
is <- getInfo r
return (is++[("export", "yes")])
}
-
--- | Remove empty directories from the export. Call after removing an
--- exported file, and after calling removeExportLocation and flushing the
--- database.
-removeEmptyDirectories :: ExportActions Annex -> ExportHandle -> ExportLocation -> [Key] -> Annex Bool
-removeEmptyDirectories ea db loc ks
- | null (exportDirectories loc) = return True
- | otherwise = case removeExportDirectory ea of
- Nothing -> return True
- Just removeexportdirectory -> do
- ok <- allM (go removeexportdirectory)
- (reverse (exportDirectories loc))
- unless ok $ liftIO $ do
- -- Add location back to export database,
- -- so this is tried again next time.
- forM_ ks $ \k ->
- addExportedLocation db k loc
- flushDbQueue db
- return ok
- where
- go removeexportdirectory d =
- ifM (liftIO $ isExportDirectoryEmpty db d)
- ( removeexportdirectory d
- , return True
- )
diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn
index 3ddca0cf8..6c6789a29 100644
--- a/doc/todo/export.mdwn
+++ b/doc/todo/export.mdwn
@@ -17,38 +17,8 @@ there need to be a new interface in supported remotes?
Work is in progress. Todo list:
-* `git annex get --from export` works in the repo that exported to it,
- but in another repo, the export db won't be populated, so it won't work.
- Maybe just show a useful error message in this case?
-
- However, exporting from one repository and then trying to update the
- export from another repository also doesn't work right, because the
- export database is not populated. So, seems that the export database needs
- to get populated based on the export log in these cases.
-
- This needs the db to contain a record of the data source,
- the tree that most recently populated it.
-
- When the export log contains a different tree than the data source,
- the export was updated in another repository, and so the
- export db needs to be updated.
-
- Updating the export db could diff the data source with the
- logged treeish. Add/delete exported files from the database to get
- it to the same state as the remote database.
-
- When an export is incomplete, the database is in some
- state in between the data source tree and the incompletely
- exported tree. Diffing won't resolve this.
-
- When to record the data source? If it's done at the same time the export
- is recorded (as no longer incomplete) in the export log, all the files
- have not yet been uploaded to the export, and the the database is not
- fully updated to match the data source.
-
- Seems that we need a separate table, to be able to look up filenames
- from the export tree by key. That table can be fully populated,
- before the Exported table is.
+* bug: export db update does not reash disk after Remote.Helper.Export calls
+ updateExportTree.
* tracking exports