From e621a2feac9734b90df74df16e6908f249f76304 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 3 Oct 2013 16:57:21 -0400 Subject: watcher: Detect at startup time when there is a stale .git/lock, and remove it so it does not interfere with the automatic commits of changed files. --- Assistant/Threads/Watcher.hs | 39 +++++++++++++++++++++++++++-- Utility/Lsof.hs | 4 +-- debian/changelog | 3 +++ doc/design/assistant/disaster_recovery.mdwn | 13 ++++++++-- 4 files changed, 53 insertions(+), 6 deletions(-) diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index 799537deb..9b9321014 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -23,7 +23,7 @@ import Assistant.Types.Changes import Assistant.Alert import Utility.DirWatcher import Utility.DirWatcher.Types -import Utility.Lsof +import qualified Utility.Lsof as Lsof import qualified Annex import qualified Annex.Queue import qualified Git @@ -50,7 +50,7 @@ import Data.Time.Clock checkCanWatch :: Annex () checkCanWatch | canWatch = do - liftIO setupLsof + liftIO Lsof.setup unlessM (liftIO (inPath "lsof") <||> Annex.getState Annex.force) needLsof | otherwise = error "watch mode is not available on this system" @@ -122,6 +122,7 @@ waitFor sig next = do {- Initial scartup scan. The action should return once the scan is complete. -} startupScan :: IO a -> Assistant a startupScan scanner = do + checkStaleIndexLock liftAnnex $ showAction "scanning" alertWhile' startupScanAlert $ do r <- liftIO scanner @@ -142,6 +143,40 @@ startupScan scanner = do return (True, r) +{- Detect when .git/index.lock exists and has no git process currently + - writing to it. This strongly suggests it is a stale lock file, because + - git writes the new index to index.lock and renames it over top. + - + - However, this could be on a network filesystem. Which is not very safe + - anyway (the assistant relies on being able to check when files have + - no writers to know when to commit them). Just in case, when the file + - appears stale, we delay for one minute, and check its size. If the size + - changed, delay for another minute, and so on. + -} +checkStaleIndexLock :: Assistant () +checkStaleIndexLock = do + dir <- liftAnnex $ fromRepo Git.localGitDir + checkStale $ dir "index.lock" +checkStale :: FilePath -> Assistant () +checkStale indexlock = go =<< getsize + where + getsize = liftIO $ catchMaybeIO $ fileSize <$> getFileStatus indexlock + go Nothing = return () + go oldsize = ifM (liftIO $ null <$> Lsof.query ["--", indexlock]) + ( do + waitforit "to check stale" + size <- getsize + if size == oldsize + then liftIO $ nukeFile indexlock + else go size + , do + waitforit "for writer on" + go =<< getsize + ) + waitforit why = do + notice ["Waiting for 60 seconds", why, indexlock] + liftIO $ threadDelaySeconds $ Seconds 60 + {- Hardcoded ignores, passed to the DirWatcher so it can avoid looking - at the entire .git directory. Does not include .gitignores. -} ignored :: FilePath -> Bool diff --git a/Utility/Lsof.hs b/Utility/Lsof.hs index 6d6b353f2..63009f723 100644 --- a/Utility/Lsof.hs +++ b/Utility/Lsof.hs @@ -26,8 +26,8 @@ data ProcessInfo = ProcessInfo ProcessID CmdLine {- lsof is not in PATH on all systems, so SysConfig may have the absolute - path where the program was found. Make sure at runtime that lsof is - available, and if it's not in PATH, adjust PATH to contain it. -} -setupLsof :: IO () -setupLsof = do +setup :: IO () +setup = do let cmd = fromMaybe "lsof" SysConfig.lsof when (isAbsolute cmd) $ do path <- getSearchPath diff --git a/debian/changelog b/debian/changelog index 950b5715b..3e51f5677 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,9 @@ git-annex (4.20131003) UNRELEASED; urgency=low * Automatically and safely detect and recover from dangling .git/annex/index.lock files, which would prevent git from committing to the git-annex branch, eg after a crash. + * watcher: Detect at startup time when there is a stale .git/lock, + and remove it so it does not interfere with the automatic + commits of changed files. -- Joey Hess Thu, 03 Oct 2013 15:41:24 -0400 diff --git a/doc/design/assistant/disaster_recovery.mdwn b/doc/design/assistant/disaster_recovery.mdwn index 28dd41c5a..e4d0f37f5 100644 --- a/doc/design/assistant/disaster_recovery.mdwn +++ b/doc/design/assistant/disaster_recovery.mdwn @@ -7,8 +7,17 @@ There are a few ways a git repository can get broken that are easily fixed. One is left over index.lck files. When a commit to a repository fails, check that nothing else is using it, fix the problem, and redo the commit. -This should be done on both the current repository and any local -repositories. Maybe also make git-annex-shell be able to do it remotely? +* **done** for .git/annex/index.lock, can be handled safely and automatically. +* **done** for .git/index.lock, only when the assistant is starting up. +* What about local remotes, eg removable drives? git-annex does attempt + to commit to the git-annex branch of those. It will use the atomatic + fix if any are dangling. It does not commit to the master branch; indeed + a removable drive typically has a bare repository. So I think nothing to + do here. +* What about git-annex-shell? If the ssh remote has the assistant running, + it can take care of it, and if not, it's a server, and perhaps the user + should be required to fix up if it crashes during a commit. This should + not affect the assistant anyway. ## incremental fsck -- cgit v1.2.3