summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Assistant.hs4
-rw-r--r--Assistant/RepoProblem.hs27
-rw-r--r--Assistant/Sync.hs2
-rw-r--r--Assistant/Threads/ProblemChecker.hs55
-rw-r--r--Assistant/Threads/ProblemFixer.hs70
-rw-r--r--Assistant/Threads/SanityChecker.hs2
-rw-r--r--Assistant/Types/RepoProblem.hs14
-rw-r--r--doc/design/assistant/disaster_recovery.mdwn5
8 files changed, 109 insertions, 70 deletions
diff --git a/Assistant.hs b/Assistant.hs
index aa1399c03..c7ca98ee9 100644
--- a/Assistant.hs
+++ b/Assistant.hs
@@ -23,7 +23,7 @@ import Assistant.Threads.TransferWatcher
import Assistant.Threads.Transferrer
import Assistant.Threads.SanityChecker
import Assistant.Threads.Cronner
-import Assistant.Threads.ProblemChecker
+import Assistant.Threads.ProblemFixer
#ifdef WITH_CLIBS
import Assistant.Threads.MountWatcher
#endif
@@ -130,7 +130,7 @@ startDaemon assistant foreground startdelay listenhost startbrowser = do
, assist $ daemonStatusThread
, assist $ sanityCheckerDailyThread
, assist $ sanityCheckerHourlyThread
- , assist $ problemCheckerThread urlrenderer
+ , assist $ problemFixerThread urlrenderer
#ifdef WITH_CLIBS
, assist $ mountWatcherThread
#endif
diff --git a/Assistant/RepoProblem.hs b/Assistant/RepoProblem.hs
index d2e5a5cf1..6913fefc6 100644
--- a/Assistant/RepoProblem.hs
+++ b/Assistant/RepoProblem.hs
@@ -8,16 +8,27 @@
module Assistant.RepoProblem where
import Assistant.Common
+import Assistant.Types.RepoProblem
import Utility.TList
import Control.Concurrent.STM
-{- Gets all repositories that have problems.
- - Blocks until there is at least one. -}
-getRepoProblems :: Assistant [UUID]
-getRepoProblems = (atomically . getTList) <<~ repoProblemChan
+{- Gets all repositories that have problems. Blocks until there is at
+ - least one. -}
+getRepoProblems :: Assistant [RepoProblem]
+getRepoProblems = nubBy sameRepoProblem
+ <$> (atomically . getTList) <<~ repoProblemChan
-{- Indicates that there was a problem accessing a repo, and the problem
- - appears to not be a transient (eg network connection) problem. -}
-repoHasProblem :: UUID -> Assistant ()
-repoHasProblem r = (atomically . flip consTList r) <<~ repoProblemChan
+{- Indicates that there was a problem with a repository, and the problem
+ - appears to not be a transient (eg network connection) problem.
+ -
+ - If the problem is able to be repaired, the passed action will be run.
+ - (However, if multiple problems are reported with a single repository,
+ - only a single action will be run.)
+ -}
+repoHasProblem :: UUID -> Assistant () -> Assistant ()
+repoHasProblem u afterrepair = do
+ rp <- RepoProblem
+ <$> pure u
+ <*> asIO afterrepair
+ (atomically . flip consTList rp) <<~ repoProblemChan
diff --git a/Assistant/Sync.hs b/Assistant/Sync.hs
index 4a021df2e..f7656f52d 100644
--- a/Assistant/Sync.hs
+++ b/Assistant/Sync.hs
@@ -67,7 +67,7 @@ reconnectRemotes notifypushes rs = void $ do
failedrs <- syncAction rs' (const go)
forM_ failedrs $ \r ->
whenM (liftIO $ Remote.checkAvailable False r) $
- repoHasProblem (Remote.uuid r)
+ repoHasProblem (Remote.uuid r) (syncRemote r)
mapM_ signal $ filter (`notElem` failedrs) rs'
where
gitremotes = filter (notspecialremote . Remote.repo) rs
diff --git a/Assistant/Threads/ProblemChecker.hs b/Assistant/Threads/ProblemChecker.hs
deleted file mode 100644
index 66dcadfff..000000000
--- a/Assistant/Threads/ProblemChecker.hs
+++ /dev/null
@@ -1,55 +0,0 @@
-{- git-annex assistant thread to handle reported problems with repositories
- -
- - Copyright 2013 Joey Hess <joey@kitenet.net>
- -
- - Licensed under the GNU GPL version 3 or higher.
- -}
-
-module Assistant.Threads.ProblemChecker (
- problemCheckerThread
-) where
-
-import Assistant.Common
-import Utility.ThreadScheduler
-import Assistant.Types.UrlRenderer
-import Assistant.Alert
-import Remote
-import qualified Types.Remote as Remote
-import qualified Git.Fsck
-import Assistant.Repair
-import qualified Git
-import Assistant.RepoProblem
-import Assistant.Sync
-import Annex.UUID
-
-{- Waits for problems with a repo, and tries to fsck the repo and repair
- - the problem. -}
-problemCheckerThread :: UrlRenderer -> NamedThread
-problemCheckerThread urlrenderer = namedThread "ProblemChecker" $ forever $ do
- mapM_ (handleProblem urlrenderer)
- =<< nub <$> getRepoProblems
- liftIO $ threadDelaySeconds (Seconds 60)
-
-handleProblem :: UrlRenderer -> UUID -> Assistant ()
-handleProblem urlrenderer u = ifM ((==) u <$> liftAnnex getUUID)
- ( handleLocalRepoProblem urlrenderer
- , maybe noop (handleRemoteProblem urlrenderer)
- =<< liftAnnex (remoteFromUUID u)
- )
-
-handleRemoteProblem :: UrlRenderer -> Remote -> Assistant ()
-handleRemoteProblem urlrenderer rmt
- | Git.repoIsLocal r && not (Git.repoIsLocalUnknown r) =
- whenM (liftIO $ checkAvailable True rmt) $ do
- repairStaleGitLocks r
- fsckresults <- showFscking urlrenderer (Just $ Remote.name rmt) $ tryNonAsync $
- Git.Fsck.findBroken True r
- whenM (repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults) $
- syncRemote rmt
- | otherwise = noop
- where
- r = Remote.repo rmt
-
-handleLocalRepoProblem :: UrlRenderer -> Assistant ()
-handleLocalRepoProblem urlrenderer = do
- repairStaleGitLocks =<< liftAnnex gitRepo
diff --git a/Assistant/Threads/ProblemFixer.hs b/Assistant/Threads/ProblemFixer.hs
new file mode 100644
index 000000000..f9774e0f0
--- /dev/null
+++ b/Assistant/Threads/ProblemFixer.hs
@@ -0,0 +1,70 @@
+{- git-annex assistant thread to handle fixing problems with repositories
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Assistant.Threads.ProblemFixer (
+ problemFixerThread
+) where
+
+import Assistant.Common
+import Assistant.Types.RepoProblem
+import Assistant.RepoProblem
+import Assistant.Types.UrlRenderer
+import Assistant.Alert
+import Remote
+import qualified Types.Remote as Remote
+import qualified Git.Fsck
+import Assistant.Repair
+import qualified Git
+import Annex.UUID
+import Utility.ThreadScheduler
+
+{- Waits for problems with a repo, and tries to fsck the repo and repair
+ - the problem. -}
+problemFixerThread :: UrlRenderer -> NamedThread
+problemFixerThread urlrenderer = namedThread "ProblemFixer" $
+ go =<< getRepoProblems
+ where
+ go problems = do
+ mapM_ (handleProblem urlrenderer) problems
+ liftIO $ threadDelaySeconds (Seconds 60)
+ -- Problems may have been re-reported while they were being
+ -- fixed, so ignore those. If a new unique problem happened
+ -- 60 seconds after the last was fixed, we're unlikely
+ -- to do much good anyway.
+ go =<< filter (\p -> not (any (sameRepoProblem p) problems))
+ <$> getRepoProblems
+
+handleProblem :: UrlRenderer -> RepoProblem -> Assistant ()
+handleProblem urlrenderer repoproblem = do
+ fixed <- ifM ((==) (problemUUID repoproblem) <$> liftAnnex getUUID)
+ ( handleLocalRepoProblem urlrenderer
+ , maybe (return False) (handleRemoteProblem urlrenderer)
+ =<< liftAnnex (remoteFromUUID $ problemUUID repoproblem)
+ )
+ when fixed $
+ liftIO $ afterFix repoproblem
+
+handleRemoteProblem :: UrlRenderer -> Remote -> Assistant Bool
+handleRemoteProblem urlrenderer rmt
+ | Git.repoIsLocal r && not (Git.repoIsLocalUnknown r) =
+ ifM (liftIO $ checkAvailable True rmt)
+ ( do
+ fixedlocks <- repairStaleGitLocks r
+ fsckresults <- showFscking urlrenderer (Just $ Remote.name rmt) $ tryNonAsync $
+ Git.Fsck.findBroken True r
+ repaired <- repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults
+ return $ fixedlocks || repaired
+ , return False
+ )
+ | otherwise = return False
+ where
+ r = Remote.repo rmt
+
+{- This is not yet used, and should probably do a fsck. -}
+handleLocalRepoProblem :: UrlRenderer -> Assistant Bool
+handleLocalRepoProblem urlrenderer = do
+ repairStaleGitLocks =<< liftAnnex gitRepo
diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs
index 0dd047fc9..4f5eeda50 100644
--- a/Assistant/Threads/SanityChecker.hs
+++ b/Assistant/Threads/SanityChecker.hs
@@ -33,7 +33,7 @@ import Data.Time.Clock.POSIX
- being nonresponsive.) -}
sanityCheckerStartupThread :: Maybe Duration -> NamedThread
sanityCheckerStartupThread startupdelay = namedThreadUnchecked "SanityCheckerStartup" $ do
- repairStaleGitLocks =<< liftAnnex gitRepo
+ void $ repairStaleGitLocks =<< liftAnnex gitRepo
liftIO $ maybe noop (threadDelaySeconds . Seconds . fromIntegral . durationSeconds) startupdelay
diff --git a/Assistant/Types/RepoProblem.hs b/Assistant/Types/RepoProblem.hs
index 40397c708..ece5a5286 100644
--- a/Assistant/Types/RepoProblem.hs
+++ b/Assistant/Types/RepoProblem.hs
@@ -1,4 +1,4 @@
-{- git-annex assistant remote problem detection
+{- git-annex assistant repository problem tracking
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
@@ -11,8 +11,18 @@ import Types
import Utility.TList
import Control.Concurrent.STM
+import Data.Function
-type RepoProblemChan = TList UUID
+data RepoProblem = RepoProblem
+ { problemUUID :: UUID
+ , afterFix :: IO ()
+ }
+
+{- The afterFix actions are assumed to all be equivilant. -}
+sameRepoProblem :: RepoProblem -> RepoProblem -> Bool
+sameRepoProblem = (==) `on` problemUUID
+
+type RepoProblemChan = TList RepoProblem
newRepoProblemChan :: IO RepoProblemChan
newRepoProblemChan = atomically newTList
diff --git a/doc/design/assistant/disaster_recovery.mdwn b/doc/design/assistant/disaster_recovery.mdwn
index 4f72d96c3..40e48650e 100644
--- a/doc/design/assistant/disaster_recovery.mdwn
+++ b/doc/design/assistant/disaster_recovery.mdwn
@@ -62,7 +62,10 @@ Add git fsck to scheduled self fsck **done**
TODO: git fsck on ssh remotes? Probably not worth the complexity..
TODO: If committing to the repository fails, after resolving any dangling
-lock files (see above), it should git fsck.
+lock files (see above), it should git fsck. This is difficult, because
+git commit will also fail if the commit turns out to be empty, or due to
+other transient problems.. So commit failures are currently ignored by the
+assistant.
If git fsck finds problems, launch git repository repair. **done**