diff options
-rw-r--r-- | Assistant/Committer.hs | 68 | ||||
-rw-r--r-- | Assistant/Watcher.hs | 42 |
2 files changed, 80 insertions, 30 deletions
diff --git a/Assistant/Committer.hs b/Assistant/Committer.hs index a572556de..6e56c2235 100644 --- a/Assistant/Committer.hs +++ b/Assistant/Committer.hs @@ -9,17 +9,21 @@ import Common.Annex import Assistant.ThreadedMonad import qualified Annex.Queue import qualified Git.Command +import qualified Command.Add import Utility.ThreadScheduler import Control.Concurrent.STM import Data.Time.Clock +data ChangeType = PendingAddChange | LinkChange | RmChange | RmDirChange + deriving (Show, Eq) + type ChangeChan = TChan Change data Change = Change { changeTime :: UTCTime , changeFile :: FilePath - , changeDesc :: String + , changeType :: ChangeType } deriving (Show) @@ -30,11 +34,12 @@ newChangeChan :: IO ChangeChan newChangeChan = atomically newTChan {- Handlers call this when they made a change that needs to get committed. -} -madeChange :: FilePath -> String -> Annex (Maybe Change) -madeChange file desc = do +madeChange :: FilePath -> ChangeType -> Annex (Maybe Change) +madeChange f t = do -- Just in case the commit thread is not flushing the queue fast enough. - Annex.Queue.flushWhenFull - liftIO $ Just <$> (Change <$> getCurrentTime <*> pure file <*> pure desc) + when (t /= PendingAddChange) $ + Annex.Queue.flushWhenFull + liftIO $ Just <$> (Change <$> getCurrentTime <*> pure f <*> pure t) noChange :: Annex (Maybe Change) noChange = return Nothing @@ -66,9 +71,58 @@ commitThread st changechan = runEvery (Seconds 1) $ do -- Now see if now's a good time to commit. time <- getCurrentTime if shouldCommit time cs - then void $ tryIO $ runThreadState st commitStaged + then do + handleAdds st changechan cs + void $ tryIO $ runThreadState st commitStaged else refillChanges changechan cs +{- If there are PendingAddChanges, the files have not yet actually been + - added to the annex, and that has to be done now, before committing. + - + - Deferring the adds to this point causes batches to be bundled together, + - which allows faster checking with lsof that the files are not still open + - for write by some other process. + - + - When a file is added, Inotify will notice the new symlink. So this waits + - for one new LinkChange to be received per file that's successfully + - added, to ensure that its symlink has been staged before returning. + -} +handleAdds :: ThreadState -> ChangeChan -> [Change] -> IO () +handleAdds st changechan cs + | null added = noop + | otherwise = do + numadded <- length . filter id <$> + runThreadState st (forM added add) + waitforlinkchanges numadded + where + added = filter isPendingAdd cs + + isPendingAdd (Change { changeType = PendingAddChange }) = True + isPendingAdd _ = False + isLinkChange (Change { changeType = LinkChange }) = True + isLinkChange _ = False + + add (Change { changeFile = file }) = do + showStart "add" file + handle file =<< Command.Add.ingest file + + handle _ Nothing = do + showEndFail + return False + handle file (Just key) = do + Command.Add.link file key True + showEndOk + return True + + waitforlinkchanges 0 = noop + waitforlinkchanges n = do + c <- runChangeChan $ readTChan changechan + if (isLinkChange c) + then waitforlinkchanges (n-1) + else do + handleAdds st changechan [c] + waitforlinkchanges n + commitStaged :: Annex () commitStaged = do Annex.Queue.flush @@ -87,7 +141,7 @@ commitStaged = do {- Decide if now is a good time to make a commit. - Note that the list of change times has an undefined order. - - - Current strategy: If there have been 10 commits within the past second, + - Current strategy: If there have been 10 changes within the past second, - a batch activity is taking place, so wait for later. -} shouldCommit :: UTCTime -> [Change] -> Bool diff --git a/Assistant/Watcher.hs b/Assistant/Watcher.hs index ee5bc13af..4aac33fd1 100644 --- a/Assistant/Watcher.hs +++ b/Assistant/Watcher.hs @@ -15,7 +15,6 @@ import Assistant.DaemonStatus import Assistant.Committer import Utility.ThreadLock import qualified Annex.Queue -import qualified Command.Add import qualified Git.Command import qualified Git.UpdateIndex import qualified Git.HashObject @@ -87,18 +86,20 @@ runHandler st dstatus changechan handler file filestatus = void $ do where go = runThreadState st $ handler file filestatus dstatus -{- Adding a file is tricky; the file has to be replaced with a symlink - - but this is race prone, as the symlink could be changed immediately - - after creation. To avoid that race, git add is not used to stage the - - symlink. +{- During initial directory scan, this will be run for any regular files + - that are already checked into git. We don't want to turn those into + - symlinks, so do a check. This is rather expensive, but only happens + - during startup. - - - Inotify will notice the new symlink, so this Handler does not stage it - - or return a Change, leaving that to onAddSymlink. + - It's possible for the file to still be open for write by some process. + - This can happen in a few ways; one is if two processes had the file open + - and only one has just closed it. We want to avoid adding a file to the + - annex that is open for write, to avoid anything being able to change it. - - - During initial directory scan, this will be run for any files that - - are already checked into git. We don't want to turn those into symlinks, - - so do a check. This is rather expensive, but only happens during - - startup. + - We could run lsof on the file here to check for other writer. + - But, that's slow. Instead, a Change is returned that indicates this file + - still needs to be added. The committer will handle bundles of these + - Changes at once. -} onAdd :: Handler onAdd file _filestatus dstatus = do @@ -110,14 +111,7 @@ onAdd file _filestatus dstatus = do ) ) where - go = do - showStart "add" file - handle =<< Command.Add.ingest file - noChange - handle Nothing = showEndFail - handle (Just key) = do - Command.Add.link file key True - showEndOk + go = madeChange file PendingAddChange {- A symlink might be an arbitrary symlink, which is just added. - Or, if it is a git-annex symlink, ensure it points to the content @@ -169,13 +163,13 @@ onAddSymlink file filestatus dstatus = go =<< Backend.lookupFile file sha <- inRepo $ Git.HashObject.hashObject BlobObject link stageSymlink file sha - madeChange file "link" + madeChange file LinkChange onDel :: Handler onDel file _ _dstatus = do Annex.Queue.addUpdateIndex =<< inRepo (Git.UpdateIndex.unstageFile file) - madeChange file "rm" + madeChange file RmChange {- A directory has been deleted, or moved, so tell git to remove anything - that was inside it from its cache. Since it could reappear at any time, @@ -188,7 +182,7 @@ onDelDir :: Handler onDelDir dir _ _dstatus = do Annex.Queue.addCommand "rm" [Params "--quiet -r --cached --ignore-unmatch --"] [dir] - madeChange dir "rmdir" + madeChange dir RmDirChange {- Called when there's an error with inotify. -} onErr :: Handler @@ -197,7 +191,9 @@ onErr msg _ _dstatus = do return Nothing {- Adds a symlink to the index, without ever accessing the actual symlink - - on disk. -} + - on disk. This avoids a race if git add is used, where the symlink is + - changed to something else immediately after creation. + -} stageSymlink :: FilePath -> Sha -> Annex () stageSymlink file sha = Annex.Queue.addUpdateIndex =<< |