diff options
-rw-r--r-- | Annex/Action.hs | 35 | ||||
-rw-r--r-- | CmdLine.hs | 25 | ||||
-rw-r--r-- | Command/Fsck.hs | 52 | ||||
-rw-r--r-- | Command/RecvKey.hs | 2 | ||||
-rw-r--r-- | Database/Fsck.hs | 10 | ||||
-rw-r--r-- | Database/Handle.hs | 46 | ||||
-rw-r--r-- | Limit.hs | 2 | ||||
-rw-r--r-- | Remote/Rsync.hs | 2 | ||||
-rw-r--r-- | debian/changelog | 14 | ||||
-rw-r--r-- | doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn | 11 | ||||
-rw-r--r-- | doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_/comment_3_4afd7d73952c7f9172c80f20a5047625._comment | 9 | ||||
-rw-r--r-- | doc/devblog/day_307__two_release_week.mdwn | 11 | ||||
-rw-r--r-- | doc/git-annex-fsck.mdwn | 7 |
13 files changed, 158 insertions, 68 deletions
diff --git a/Annex/Action.hs b/Annex/Action.hs new file mode 100644 index 000000000..f59c9c2f4 --- /dev/null +++ b/Annex/Action.hs @@ -0,0 +1,35 @@ +{- git-annex actions + - + - Copyright 2010-2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Action where + +import qualified Data.Map as M +#ifndef mingw32_HOST_OS +import System.Posix.Signals +#endif + +import Common.Annex +import qualified Annex +import Annex.Content + +{- Actions to perform each time ran. -} +startup :: Annex () +startup = +#ifndef mingw32_HOST_OS + liftIO $ void $ installHandler sigINT Default Nothing +#else + return () +#endif + +{- Cleanup actions. -} +shutdown :: Bool -> Annex () +shutdown nocommit = do + saveState nocommit + sequence_ =<< M.elems <$> Annex.getState Annex.cleanup + liftIO reapZombies -- zombies from long-running git processes diff --git a/CmdLine.hs b/CmdLine.hs index 492a3b75f..a512d868d 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -5,29 +5,22 @@ - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE CPP #-} - module CmdLine ( dispatch, usage, - shutdown ) where import qualified Options.Applicative as O import qualified Options.Applicative.Help as H import qualified Control.Exception as E -import qualified Data.Map as M import Control.Exception (throw) -#ifndef mingw32_HOST_OS -import System.Posix.Signals -#endif import Common.Annex import qualified Annex import qualified Git import qualified Git.AutoCorrect import qualified Git.Config -import Annex.Content +import Annex.Action import Annex.Environment import Command import Types.Messages @@ -117,19 +110,3 @@ findCmd fuzzyok argv cmds inexactcmds = case name of Nothing -> [] Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds - -{- Actions to perform each time ran. -} -startup :: Annex () -startup = -#ifndef mingw32_HOST_OS - liftIO $ void $ installHandler sigINT Default Nothing -#else - return () -#endif - -{- Cleanup actions. -} -shutdown :: Bool -> Annex () -shutdown nocommit = do - saveState nocommit - sequence_ =<< M.elems <$> Annex.getState Annex.cleanup - liftIO reapZombies -- zombies from long-running git processes diff --git a/Command/Fsck.hs b/Command/Fsck.hs index a201c4519..c3b73edb5 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -457,18 +457,12 @@ runFsck inc file key a = ifM (needFsck inc key) {- Check if a key needs to be fscked, with support for incremental fscks. -} needFsck :: Incremental -> Key -> Annex Bool +needFsck (ScheduleIncremental _ _ i) k = needFsck i k #ifdef WITH_DATABASE needFsck (ContIncremental h) key = liftIO $ not <$> FsckDb.inDb h key #endif needFsck _ _ = return True -#ifdef WITH_DATABASE -withFsckDb :: Incremental -> (FsckDb.FsckHandle -> Annex ()) -> Annex () -withFsckDb (ContIncremental h) a = a h -withFsckDb (StartIncremental h) a = a h -withFsckDb NonIncremental _ = noop -#endif - recordFsckTime :: Incremental -> Key -> Annex () #ifdef WITH_DATABASE recordFsckTime inc key = withFsckDb inc $ \h -> liftIO $ FsckDb.addDb h key @@ -527,7 +521,8 @@ data Incremental = NonIncremental #ifdef WITH_DATABASE | StartIncremental FsckDb.FsckHandle - | ContIncremental FsckDb.FsckHandle + | ContIncremental FsckDb.FsckHandle + | ScheduleIncremental Duration UUID Incremental #endif prepIncremental :: UUID -> Maybe IncrementalOpt -> Annex Incremental @@ -536,31 +531,44 @@ prepIncremental _ Nothing = pure NonIncremental prepIncremental u (Just StartIncrementalO) = do recordStartTime u ifM (FsckDb.newPass u) - ( StartIncremental <$> FsckDb.openDb u + ( StartIncremental <$> openFsckDb u , error "Cannot start a new --incremental fsck pass; another fsck process is already running." ) prepIncremental u (Just MoreIncrementalO) = - ContIncremental <$> FsckDb.openDb u + ContIncremental <$> openFsckDb u prepIncremental u (Just (ScheduleIncrementalO delta)) = do - Annex.addCleanup FsckCleanup $ do - v <- getStartTime u - case v of - Nothing -> noop - Just started -> do - now <- liftIO getPOSIXTime - when (now - realToFrac started >= durationToPOSIXTime delta) $ - resetStartTime u started <- getStartTime u - prepIncremental u $ Just $ case started of + i <- prepIncremental u $ Just $ case started of Nothing -> StartIncrementalO Just _ -> MoreIncrementalO + return (ScheduleIncremental delta u i) #else prepIncremental _ _ = error "This git-annex was not built with database support; incremental fsck not supported" #endif cleanupIncremental :: Incremental -> Annex () -#ifdef WITH_DATABASE -cleanupIncremental i = withFsckDb i FsckDb.closeDb -#else +cleanupIncremental (ScheduleIncremental delta u i) = do + v <- getStartTime u + case v of + Nothing -> noop + Just started -> do + now <- liftIO getPOSIXTime + when (now - realToFrac started >= durationToPOSIXTime delta) $ + resetStartTime u + cleanupIncremental i cleanupIncremental _ = return () + +#ifdef WITH_DATABASE +openFsckDb :: UUID -> Annex FsckDb.FsckHandle +openFsckDb u = do + h <- FsckDb.openDb u + Annex.addCleanup FsckCleanup $ + FsckDb.closeDb h + return h + +withFsckDb :: Incremental -> (FsckDb.FsckHandle -> Annex ()) -> Annex () +withFsckDb (ContIncremental h) a = a h +withFsckDb (StartIncremental h) a = a h +withFsckDb NonIncremental _ = noop +withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a #endif diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index a49efce2f..cb24dbb47 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -9,8 +9,8 @@ module Command.RecvKey where import Common.Annex import Command -import CmdLine import Annex.Content +import Annex.Action import Annex import Utility.Rsync import Logs.Transfer diff --git a/Database/Fsck.hs b/Database/Fsck.hs index d9416927b..20b4878e3 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -34,6 +34,7 @@ import Annex.LockFile import Database.Persist.TH import Database.Esqueleto hiding (Key) +import Data.Time.Clock data FsckHandle = FsckHandle H.DbHandle UUID @@ -84,11 +85,18 @@ closeDb (FsckHandle h u) = do unlockFile =<< fromRepo (gitAnnexFsckDbLock u) addDb :: FsckHandle -> Key -> IO () -addDb (FsckHandle h _) k = H.queueDb h 1000 $ +addDb (FsckHandle h _) k = H.queueDb h checkcommit $ void $ insertUnique $ Fscked sk where sk = toSKey k + -- commit queue after 1000 files or 5 minutes, whichever comes first + checkcommit sz lastcommittime + | sz > 1000 = return True + | otherwise = do + now <- getCurrentTime + return $ diffUTCTime lastcommittime now > 300 + inDb :: FsckHandle -> Key -> IO Bool inDb (FsckHandle h _) = H.queryDb h . inDb' . toSKey diff --git a/Database/Handle.hs b/Database/Handle.hs index dc3363e48..1fd9f7834 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -20,6 +20,7 @@ module Database.Handle ( ) where import Utility.Exception +import Utility.Monad import Messages import Database.Persist.Sqlite @@ -33,6 +34,7 @@ import qualified Data.Text as T import Control.Monad.Trans.Resource (runResourceT) import Control.Monad.Logger (runNoLoggingT) import Data.List +import Data.Time.Clock {- A DbHandle is a reference to a worker thread that communicates with - the database. It has a MVar which Jobs are submitted to. -} @@ -64,7 +66,7 @@ openDb :: FilePath -> TableName -> IO DbHandle openDb db tablename = do jobs <- newEmptyMVar worker <- async (workerThread (T.pack db) tablename jobs) - q <- newMVar emptyDbQueue + q <- newMVar =<< emptyDbQueue return $ DbHandle worker jobs q data Job @@ -145,16 +147,19 @@ closeDb h@(DbHandle worker jobs _) = do type Size = Int +type LastCommitTime = UTCTime + {- A queue of actions to perform, with a count of the number of actions - - queued. -} -data DbQueue = DbQueue Size (SqlPersistM ()) + - queued, and a last commit time. -} +data DbQueue = DbQueue Size LastCommitTime (SqlPersistM ()) -emptyDbQueue :: DbQueue -emptyDbQueue = DbQueue 0 (return ()) +emptyDbQueue :: IO DbQueue +emptyDbQueue = do + now <- getCurrentTime + return $ DbQueue 0 now (return ()) {- Queues a change to be made to the database. It will be buffered - - to be committed later, unless the queue gets larger than the specified - - size. + - to be committed later, unless the commitchecker action returns true. - - (Be sure to call closeDb or flushQueueDb to ensure the change - gets committed.) @@ -164,25 +169,32 @@ emptyDbQueue = DbQueue 0 (return ()) - process, the transaction is put back in the queue. This solves - the sqlite multiple writer problem. -} -queueDb :: DbHandle -> Size -> SqlPersistM () -> IO () -queueDb h@(DbHandle _ _ qvar) maxsz a = do - DbQueue sz qa <- takeMVar qvar +queueDb + :: DbHandle + -> (Size -> LastCommitTime -> IO Bool) + -> SqlPersistM () + -> IO () +queueDb h@(DbHandle _ _ qvar) commitchecker a = do + DbQueue sz lastcommittime qa <- takeMVar qvar let !sz' = sz + 1 let qa' = qa >> a - let enqueue newsz = putMVar qvar (DbQueue newsz qa') - if sz' > maxsz - then do + let enqueue = putMVar qvar + ifM (commitchecker sz' lastcommittime) + ( do r <- commitDb h qa' case r of - Left _ -> enqueue 0 - Right _ -> putMVar qvar emptyDbQueue - else enqueue sz' + Left _ -> enqueue $ DbQueue sz' lastcommittime qa' + Right _ -> do + now <- getCurrentTime + enqueue $ DbQueue 0 now (return ()) + , enqueue $ DbQueue sz' lastcommittime qa' + ) {- If flushing the queue fails, this could be because there is another - writer to the database. Retry repeatedly for up to 10 seconds. -} flushQueueDb :: DbHandle -> IO () flushQueueDb h@(DbHandle _ _ qvar) = do - DbQueue sz qa <- takeMVar qvar + DbQueue sz _ qa <- takeMVar qvar when (sz > 0) $ robustly Nothing 100 (commitDb h qa) where @@ -13,6 +13,7 @@ import qualified Utility.Matcher import qualified Remote import qualified Backend import Annex.Content +import Annex.Action import Annex.UUID import Logs.Trust import Annex.NumCopies @@ -271,6 +272,7 @@ addTimeLimit s = do if now > cutoff then do warning $ "Time limit (" ++ s ++ ") reached!" + shutdown True liftIO $ exitWith $ ExitFailure 101 else return True diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index b9cdc458e..c610938a9 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -33,8 +33,6 @@ import Utility.Rsync import Utility.CopyFile import Messages.Progress import Utility.Metered -import Utility.PID -import Annex.Perms import Logs.Transfer import Types.Creds import Types.Key (isChunkKey) diff --git a/debian/changelog b/debian/changelog index 740e7e0b5..bfcd6725a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,17 @@ +git-annex (5.20150732) UNRELEASED; urgency=medium + + * Perform a clean shutdown when --time-limit is reached. + This includes running queued git commands, and cleanup actions normally + run when a command is finished. + * fsck: Commit incremental fsck database when --time-limit is reached. + Previously, some of the last files fscked did not make it into the + database when using --time-limit. + * fsck: Commit incremental fsck database after every 1000 files + fscked, or every 5 minutes, whichever comes first. Previously, + commits were made every 1000 files fscked. + + -- Joey Hess <id@joeyh.name> Fri, 31 Jul 2015 12:31:39 -0400 + git-annex (5.20150731) unstable; urgency=medium * webapp: Support enabling known gitlab.com remotes. diff --git a/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn index 001284821..8cc989a0b 100644 --- a/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn +++ b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn @@ -21,3 +21,14 @@ Also tried with `docker/ubuntu:latest` using a clean install from https://downlo git-annex: 5.20150522-gb199d65 Linux: 3.16.0-43-generic #58-Ubuntu SMP Fri Jun 19 11:04:02 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux +> I've adjusted the timing of the fsck checkpoints used in resumes some. +> Now it makes one every 5 minutes, or every 1000 files, whichever comes +> first. I could make this tunable, but I don't think it's worth adding the +> complexity; this heuristic should work pretty well. +> +> Another approach would be to catch sigint and commit the fsck database +> then, as is now done when --time-limit interrupts a fsck run. +> But, I am leery of complicating git-annex with signal handling, +> so I've not done that currently. +> +> Also, documented this in fsck's man page. [[done]] --[[Joey]] diff --git a/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_/comment_3_4afd7d73952c7f9172c80f20a5047625._comment b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_/comment_3_4afd7d73952c7f9172c80f20a5047625._comment new file mode 100644 index 000000000..014b1a431 --- /dev/null +++ b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_/comment_3_4afd7d73952c7f9172c80f20a5047625._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2015-07-31T20:01:09Z" + content=""" +Yeah, very good point about --time-limit. I've gone ahead and made that +result in a fsck database save, so it will pick up right where it left off +when using --time-limit. +"""]] diff --git a/doc/devblog/day_307__two_release_week.mdwn b/doc/devblog/day_307__two_release_week.mdwn new file mode 100644 index 000000000..32202906f --- /dev/null +++ b/doc/devblog/day_307__two_release_week.mdwn @@ -0,0 +1,11 @@ +Made a release this morning, mostly because the release earlier this week +turns out to have accidentially removed several options from `git annex copy`. + +Spent some time this afternoon improving how git-annex shuts down when +--time-limit is used. This used to be a quick and dirty shutdown, similar +to if git-annex were ctrl-c'd, but I reworked things so it does a clean +shutdown, including running any buffered git commands. +This made incremental fsck with --time-limit resume much better, since +it saves the incremental fsck database on shutdown. Also tuned when the +database gets checkpointed during an incremental fsck, to resume better after +it's interrupted. diff --git a/doc/git-annex-fsck.mdwn b/doc/git-annex-fsck.mdwn index 73c401eb3..68c824c91 100644 --- a/doc/git-annex-fsck.mdwn +++ b/doc/git-annex-fsck.mdwn @@ -37,7 +37,12 @@ With parameters, only the specified files are checked. * `--more` - Continue the last incremental fsck pass, where it left off. + Resume the last incremental fsck pass, where it left off. + + Resuming may redundantly check some files that were checked + before. Any files that fsck found problems with before will be re-checked + on resume. Also, checkpoints are made every 1000 files or every 5 minutes + during a fsck, and it resumes from the last checkpoint. * `--incremental-schedule=time` |