summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Assistant/Committer.hs68
-rw-r--r--Assistant/Watcher.hs42
2 files changed, 80 insertions, 30 deletions
diff --git a/Assistant/Committer.hs b/Assistant/Committer.hs
index a572556de..6e56c2235 100644
--- a/Assistant/Committer.hs
+++ b/Assistant/Committer.hs
@@ -9,17 +9,21 @@ import Common.Annex
import Assistant.ThreadedMonad
import qualified Annex.Queue
import qualified Git.Command
+import qualified Command.Add
import Utility.ThreadScheduler
import Control.Concurrent.STM
import Data.Time.Clock
+data ChangeType = PendingAddChange | LinkChange | RmChange | RmDirChange
+ deriving (Show, Eq)
+
type ChangeChan = TChan Change
data Change = Change
{ changeTime :: UTCTime
, changeFile :: FilePath
- , changeDesc :: String
+ , changeType :: ChangeType
}
deriving (Show)
@@ -30,11 +34,12 @@ newChangeChan :: IO ChangeChan
newChangeChan = atomically newTChan
{- Handlers call this when they made a change that needs to get committed. -}
-madeChange :: FilePath -> String -> Annex (Maybe Change)
-madeChange file desc = do
+madeChange :: FilePath -> ChangeType -> Annex (Maybe Change)
+madeChange f t = do
-- Just in case the commit thread is not flushing the queue fast enough.
- Annex.Queue.flushWhenFull
- liftIO $ Just <$> (Change <$> getCurrentTime <*> pure file <*> pure desc)
+ when (t /= PendingAddChange) $
+ Annex.Queue.flushWhenFull
+ liftIO $ Just <$> (Change <$> getCurrentTime <*> pure f <*> pure t)
noChange :: Annex (Maybe Change)
noChange = return Nothing
@@ -66,9 +71,58 @@ commitThread st changechan = runEvery (Seconds 1) $ do
-- Now see if now's a good time to commit.
time <- getCurrentTime
if shouldCommit time cs
- then void $ tryIO $ runThreadState st commitStaged
+ then do
+ handleAdds st changechan cs
+ void $ tryIO $ runThreadState st commitStaged
else refillChanges changechan cs
+{- If there are PendingAddChanges, the files have not yet actually been
+ - added to the annex, and that has to be done now, before committing.
+ -
+ - Deferring the adds to this point causes batches to be bundled together,
+ - which allows faster checking with lsof that the files are not still open
+ - for write by some other process.
+ -
+ - When a file is added, Inotify will notice the new symlink. So this waits
+ - for one new LinkChange to be received per file that's successfully
+ - added, to ensure that its symlink has been staged before returning.
+ -}
+handleAdds :: ThreadState -> ChangeChan -> [Change] -> IO ()
+handleAdds st changechan cs
+ | null added = noop
+ | otherwise = do
+ numadded <- length . filter id <$>
+ runThreadState st (forM added add)
+ waitforlinkchanges numadded
+ where
+ added = filter isPendingAdd cs
+
+ isPendingAdd (Change { changeType = PendingAddChange }) = True
+ isPendingAdd _ = False
+ isLinkChange (Change { changeType = LinkChange }) = True
+ isLinkChange _ = False
+
+ add (Change { changeFile = file }) = do
+ showStart "add" file
+ handle file =<< Command.Add.ingest file
+
+ handle _ Nothing = do
+ showEndFail
+ return False
+ handle file (Just key) = do
+ Command.Add.link file key True
+ showEndOk
+ return True
+
+ waitforlinkchanges 0 = noop
+ waitforlinkchanges n = do
+ c <- runChangeChan $ readTChan changechan
+ if (isLinkChange c)
+ then waitforlinkchanges (n-1)
+ else do
+ handleAdds st changechan [c]
+ waitforlinkchanges n
+
commitStaged :: Annex ()
commitStaged = do
Annex.Queue.flush
@@ -87,7 +141,7 @@ commitStaged = do
{- Decide if now is a good time to make a commit.
- Note that the list of change times has an undefined order.
-
- - Current strategy: If there have been 10 commits within the past second,
+ - Current strategy: If there have been 10 changes within the past second,
- a batch activity is taking place, so wait for later.
-}
shouldCommit :: UTCTime -> [Change] -> Bool
diff --git a/Assistant/Watcher.hs b/Assistant/Watcher.hs
index ee5bc13af..4aac33fd1 100644
--- a/Assistant/Watcher.hs
+++ b/Assistant/Watcher.hs
@@ -15,7 +15,6 @@ import Assistant.DaemonStatus
import Assistant.Committer
import Utility.ThreadLock
import qualified Annex.Queue
-import qualified Command.Add
import qualified Git.Command
import qualified Git.UpdateIndex
import qualified Git.HashObject
@@ -87,18 +86,20 @@ runHandler st dstatus changechan handler file filestatus = void $ do
where
go = runThreadState st $ handler file filestatus dstatus
-{- Adding a file is tricky; the file has to be replaced with a symlink
- - but this is race prone, as the symlink could be changed immediately
- - after creation. To avoid that race, git add is not used to stage the
- - symlink.
+{- During initial directory scan, this will be run for any regular files
+ - that are already checked into git. We don't want to turn those into
+ - symlinks, so do a check. This is rather expensive, but only happens
+ - during startup.
-
- - Inotify will notice the new symlink, so this Handler does not stage it
- - or return a Change, leaving that to onAddSymlink.
+ - It's possible for the file to still be open for write by some process.
+ - This can happen in a few ways; one is if two processes had the file open
+ - and only one has just closed it. We want to avoid adding a file to the
+ - annex that is open for write, to avoid anything being able to change it.
-
- - During initial directory scan, this will be run for any files that
- - are already checked into git. We don't want to turn those into symlinks,
- - so do a check. This is rather expensive, but only happens during
- - startup.
+ - We could run lsof on the file here to check for other writer.
+ - But, that's slow. Instead, a Change is returned that indicates this file
+ - still needs to be added. The committer will handle bundles of these
+ - Changes at once.
-}
onAdd :: Handler
onAdd file _filestatus dstatus = do
@@ -110,14 +111,7 @@ onAdd file _filestatus dstatus = do
)
)
where
- go = do
- showStart "add" file
- handle =<< Command.Add.ingest file
- noChange
- handle Nothing = showEndFail
- handle (Just key) = do
- Command.Add.link file key True
- showEndOk
+ go = madeChange file PendingAddChange
{- A symlink might be an arbitrary symlink, which is just added.
- Or, if it is a git-annex symlink, ensure it points to the content
@@ -169,13 +163,13 @@ onAddSymlink file filestatus dstatus = go =<< Backend.lookupFile file
sha <- inRepo $
Git.HashObject.hashObject BlobObject link
stageSymlink file sha
- madeChange file "link"
+ madeChange file LinkChange
onDel :: Handler
onDel file _ _dstatus = do
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.unstageFile file)
- madeChange file "rm"
+ madeChange file RmChange
{- A directory has been deleted, or moved, so tell git to remove anything
- that was inside it from its cache. Since it could reappear at any time,
@@ -188,7 +182,7 @@ onDelDir :: Handler
onDelDir dir _ _dstatus = do
Annex.Queue.addCommand "rm"
[Params "--quiet -r --cached --ignore-unmatch --"] [dir]
- madeChange dir "rmdir"
+ madeChange dir RmDirChange
{- Called when there's an error with inotify. -}
onErr :: Handler
@@ -197,7 +191,9 @@ onErr msg _ _dstatus = do
return Nothing
{- Adds a symlink to the index, without ever accessing the actual symlink
- - on disk. -}
+ - on disk. This avoids a race if git add is used, where the symlink is
+ - changed to something else immediately after creation.
+ -}
stageSymlink :: FilePath -> Sha -> Annex ()
stageSymlink file sha =
Annex.Queue.addUpdateIndex =<<