From c18aca6a6a1673d17467e25eb5c900f824c8231c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 17 Feb 2015 13:04:22 -0400 Subject: avoid crash when starting fsck --incremental when one is already running Turns out sqlite does not like having its database deleted out from underneath it. It might suffice to empty the table, but I would rather start each fsck over with a new database, so I added a lock file, and running incremental fscks use a shared lock. This leaves one concurrency bug left; running two concurrent fsck --more will lead to: "SQLite3 returned ErrorBusy while attempting to perform step." and one or both will fail. This is a concurrent writers problem. --- Annex/LockFile.hs | 21 ++++++++++++++++++++- Command/Fsck.hs | 8 +++++--- Database/Fsck.hs | 24 ++++++++++++++++++++---- Locations.hs | 5 +++++ 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/Annex/LockFile.hs b/Annex/LockFile.hs index 18e876c75..62a101aa5 100644 --- a/Annex/LockFile.hs +++ b/Annex/LockFile.hs @@ -1,6 +1,6 @@ {- git-annex lock files. - - - Copyright 2012, 2014 Joey Hess + - Copyright 2012-2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -12,6 +12,7 @@ module Annex.LockFile ( unlockFile, getLockPool, withExclusiveLock, + tryExclusiveLock, ) where import Common.Annex @@ -70,3 +71,21 @@ withExclusiveLock getlockfile a = do #else lock _mode = waitToLock . lockExclusive #endif + +{- Tries to take an exclusive lock and run an action. If the lock is + - already held, returns Nothing. -} +tryExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex (Maybe a) +tryExclusiveLock getlockfile a = do + lockfile <- fromRepo getlockfile + createAnnexDirectory $ takeDirectory lockfile + mode <- annexFileMode + bracketIO (lock mode lockfile) unlock go + where +#ifndef mingw32_HOST_OS + lock mode = noUmask mode . tryLockExclusive (Just mode) +#else + lock _mode = lockExclusive +#endif + unlock = maybe noop dropLock + go Nothing = return Nothing + go (Just _) = Just <$> a diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 9dba21dbf..799396a10 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -72,7 +72,7 @@ seek ps = do (\k -> startKey i k =<< getNumCopies) (withFilesInGit $ whenAnnexed $ start from i) ps - withFsckDb i (liftIO . FsckDb.closeDb) + withFsckDb i FsckDb.closeDb getIncremental :: Annex Incremental getIncremental = do @@ -91,8 +91,10 @@ getIncremental = do where startIncremental = do recordStartTime - FsckDb.newPass - StartIncremental <$> FsckDb.openDb + ifM FsckDb.newPass + ( StartIncremental <$> FsckDb.openDb + , error "Cannot start a new --incremental fsck pass; another fsck process is already running." + ) contIncremental = ContIncremental <$> FsckDb.openDb checkschedule Nothing = error "bad --incremental-schedule value" diff --git a/Database/Fsck.hs b/Database/Fsck.hs index a137cba8e..a42988205 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -11,8 +11,8 @@ module Database.Fsck ( newPass, openDb, + closeDb, H.commitDb, - H.closeDb, H.DbHandle, addDb, inDb, @@ -26,6 +26,7 @@ import Utility.Directory import Annex import Types.Key import Annex.Perms +import Annex.LockFile import Database.Persist.TH import Database.Esqueleto hiding (Key) @@ -33,6 +34,8 @@ import Control.Monad import Control.Monad.IfElse import Control.Monad.IO.Class (liftIO) import System.Directory +import Data.Maybe +import Control.Applicative {- Each key stored in the database has already been fscked as part - of the latest incremental fsck pass. -} @@ -42,9 +45,16 @@ Fscked UniqueKey key |] -{- The database is removed when starting a new incremental fsck pass. -} -newPass :: Annex () -newPass = liftIO. nukeFile =<< fromRepo gitAnnexFsckDb +{- The database is removed when starting a new incremental fsck pass. + - + - This may fail, if other fsck processes are currently running using the + - database. Removing the database in that situation would lead to crashes + - or undefined behavior. + -} +newPass :: Annex Bool +newPass = isJust <$> tryExclusiveLock gitAnnexFsckDbLock go + where + go = liftIO. nukeFile =<< fromRepo gitAnnexFsckDb {- Opens the database, creating it atomically if it doesn't exist yet. -} openDb :: Annex H.DbHandle @@ -58,8 +68,14 @@ openDb = do liftIO $ H.closeDb h setAnnexFilePerm newdb liftIO $ renameFile newdb db + lockFileShared =<< fromRepo gitAnnexFsckDbLock liftIO $ H.openDb db +closeDb :: H.DbHandle -> Annex () +closeDb h = do + liftIO $ H.closeDb h + unlockFile =<< fromRepo gitAnnexFsckDbLock + addDb :: H.DbHandle -> Key -> IO () addDb h = void . H.runDb' h commitPolicy . insert . Fscked . toSKey diff --git a/Locations.hs b/Locations.hs index 0cd57aa98..94c03cb91 100644 --- a/Locations.hs +++ b/Locations.hs @@ -58,6 +58,7 @@ module Locations ( gitAnnexRemotesDir, gitAnnexAssistantDefaultDir, gitAnnexFsckDb, + gitAnnexFsckDbLock, isLinkToAnnex, HashLevels(..), hashDirMixed, @@ -345,6 +346,10 @@ gitAnnexAssistantDefaultDir = "annex" gitAnnexFsckDb :: Git.Repo -> FilePath gitAnnexFsckDb r = gitAnnexDir r "fsck.db" +{- Lock file for the fsck database. -} +gitAnnexFsckDbLock :: Git.Repo -> FilePath +gitAnnexFsckDbLock r = gitAnnexDir r "fsck.dbl" + {- Checks a symlink target to see if it appears to point to annexed content. - - We only look at paths inside the .git directory, and not at the .git -- cgit v1.2.3