summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/Action.hs35
-rw-r--r--CmdLine.hs25
-rw-r--r--Command/Fsck.hs52
-rw-r--r--Command/RecvKey.hs2
-rw-r--r--Database/Fsck.hs10
-rw-r--r--Database/Handle.hs46
-rw-r--r--Limit.hs2
-rw-r--r--Remote/Rsync.hs2
-rw-r--r--debian/changelog14
-rw-r--r--doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn11
-rw-r--r--doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_/comment_3_4afd7d73952c7f9172c80f20a5047625._comment9
-rw-r--r--doc/devblog/day_307__two_release_week.mdwn11
-rw-r--r--doc/git-annex-fsck.mdwn7
13 files changed, 158 insertions, 68 deletions
diff --git a/Annex/Action.hs b/Annex/Action.hs
new file mode 100644
index 000000000..f59c9c2f4
--- /dev/null
+++ b/Annex/Action.hs
@@ -0,0 +1,35 @@
+{- git-annex actions
+ -
+ - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE CPP #-}
+
+module Annex.Action where
+
+import qualified Data.Map as M
+#ifndef mingw32_HOST_OS
+import System.Posix.Signals
+#endif
+
+import Common.Annex
+import qualified Annex
+import Annex.Content
+
+{- Actions to perform each time ran. -}
+startup :: Annex ()
+startup =
+#ifndef mingw32_HOST_OS
+ liftIO $ void $ installHandler sigINT Default Nothing
+#else
+ return ()
+#endif
+
+{- Cleanup actions. -}
+shutdown :: Bool -> Annex ()
+shutdown nocommit = do
+ saveState nocommit
+ sequence_ =<< M.elems <$> Annex.getState Annex.cleanup
+ liftIO reapZombies -- zombies from long-running git processes
diff --git a/CmdLine.hs b/CmdLine.hs
index 492a3b75f..a512d868d 100644
--- a/CmdLine.hs
+++ b/CmdLine.hs
@@ -5,29 +5,22 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-{-# LANGUAGE CPP #-}
-
module CmdLine (
dispatch,
usage,
- shutdown
) where
import qualified Options.Applicative as O
import qualified Options.Applicative.Help as H
import qualified Control.Exception as E
-import qualified Data.Map as M
import Control.Exception (throw)
-#ifndef mingw32_HOST_OS
-import System.Posix.Signals
-#endif
import Common.Annex
import qualified Annex
import qualified Git
import qualified Git.AutoCorrect
import qualified Git.Config
-import Annex.Content
+import Annex.Action
import Annex.Environment
import Command
import Types.Messages
@@ -117,19 +110,3 @@ findCmd fuzzyok argv cmds
inexactcmds = case name of
Nothing -> []
Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds
-
-{- Actions to perform each time ran. -}
-startup :: Annex ()
-startup =
-#ifndef mingw32_HOST_OS
- liftIO $ void $ installHandler sigINT Default Nothing
-#else
- return ()
-#endif
-
-{- Cleanup actions. -}
-shutdown :: Bool -> Annex ()
-shutdown nocommit = do
- saveState nocommit
- sequence_ =<< M.elems <$> Annex.getState Annex.cleanup
- liftIO reapZombies -- zombies from long-running git processes
diff --git a/Command/Fsck.hs b/Command/Fsck.hs
index a201c4519..c3b73edb5 100644
--- a/Command/Fsck.hs
+++ b/Command/Fsck.hs
@@ -457,18 +457,12 @@ runFsck inc file key a = ifM (needFsck inc key)
{- Check if a key needs to be fscked, with support for incremental fscks. -}
needFsck :: Incremental -> Key -> Annex Bool
+needFsck (ScheduleIncremental _ _ i) k = needFsck i k
#ifdef WITH_DATABASE
needFsck (ContIncremental h) key = liftIO $ not <$> FsckDb.inDb h key
#endif
needFsck _ _ = return True
-#ifdef WITH_DATABASE
-withFsckDb :: Incremental -> (FsckDb.FsckHandle -> Annex ()) -> Annex ()
-withFsckDb (ContIncremental h) a = a h
-withFsckDb (StartIncremental h) a = a h
-withFsckDb NonIncremental _ = noop
-#endif
-
recordFsckTime :: Incremental -> Key -> Annex ()
#ifdef WITH_DATABASE
recordFsckTime inc key = withFsckDb inc $ \h -> liftIO $ FsckDb.addDb h key
@@ -527,7 +521,8 @@ data Incremental
= NonIncremental
#ifdef WITH_DATABASE
| StartIncremental FsckDb.FsckHandle
- | ContIncremental FsckDb.FsckHandle
+ | ContIncremental FsckDb.FsckHandle
+ | ScheduleIncremental Duration UUID Incremental
#endif
prepIncremental :: UUID -> Maybe IncrementalOpt -> Annex Incremental
@@ -536,31 +531,44 @@ prepIncremental _ Nothing = pure NonIncremental
prepIncremental u (Just StartIncrementalO) = do
recordStartTime u
ifM (FsckDb.newPass u)
- ( StartIncremental <$> FsckDb.openDb u
+ ( StartIncremental <$> openFsckDb u
, error "Cannot start a new --incremental fsck pass; another fsck process is already running."
)
prepIncremental u (Just MoreIncrementalO) =
- ContIncremental <$> FsckDb.openDb u
+ ContIncremental <$> openFsckDb u
prepIncremental u (Just (ScheduleIncrementalO delta)) = do
- Annex.addCleanup FsckCleanup $ do
- v <- getStartTime u
- case v of
- Nothing -> noop
- Just started -> do
- now <- liftIO getPOSIXTime
- when (now - realToFrac started >= durationToPOSIXTime delta) $
- resetStartTime u
started <- getStartTime u
- prepIncremental u $ Just $ case started of
+ i <- prepIncremental u $ Just $ case started of
Nothing -> StartIncrementalO
Just _ -> MoreIncrementalO
+ return (ScheduleIncremental delta u i)
#else
prepIncremental _ _ = error "This git-annex was not built with database support; incremental fsck not supported"
#endif
cleanupIncremental :: Incremental -> Annex ()
-#ifdef WITH_DATABASE
-cleanupIncremental i = withFsckDb i FsckDb.closeDb
-#else
+cleanupIncremental (ScheduleIncremental delta u i) = do
+ v <- getStartTime u
+ case v of
+ Nothing -> noop
+ Just started -> do
+ now <- liftIO getPOSIXTime
+ when (now - realToFrac started >= durationToPOSIXTime delta) $
+ resetStartTime u
+ cleanupIncremental i
cleanupIncremental _ = return ()
+
+#ifdef WITH_DATABASE
+openFsckDb :: UUID -> Annex FsckDb.FsckHandle
+openFsckDb u = do
+ h <- FsckDb.openDb u
+ Annex.addCleanup FsckCleanup $
+ FsckDb.closeDb h
+ return h
+
+withFsckDb :: Incremental -> (FsckDb.FsckHandle -> Annex ()) -> Annex ()
+withFsckDb (ContIncremental h) a = a h
+withFsckDb (StartIncremental h) a = a h
+withFsckDb NonIncremental _ = noop
+withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a
#endif
diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs
index a49efce2f..cb24dbb47 100644
--- a/Command/RecvKey.hs
+++ b/Command/RecvKey.hs
@@ -9,8 +9,8 @@ module Command.RecvKey where
import Common.Annex
import Command
-import CmdLine
import Annex.Content
+import Annex.Action
import Annex
import Utility.Rsync
import Logs.Transfer
diff --git a/Database/Fsck.hs b/Database/Fsck.hs
index d9416927b..20b4878e3 100644
--- a/Database/Fsck.hs
+++ b/Database/Fsck.hs
@@ -34,6 +34,7 @@ import Annex.LockFile
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
+import Data.Time.Clock
data FsckHandle = FsckHandle H.DbHandle UUID
@@ -84,11 +85,18 @@ closeDb (FsckHandle h u) = do
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
addDb :: FsckHandle -> Key -> IO ()
-addDb (FsckHandle h _) k = H.queueDb h 1000 $
+addDb (FsckHandle h _) k = H.queueDb h checkcommit $
void $ insertUnique $ Fscked sk
where
sk = toSKey k
+ -- commit queue after 1000 files or 5 minutes, whichever comes first
+ checkcommit sz lastcommittime
+ | sz > 1000 = return True
+ | otherwise = do
+ now <- getCurrentTime
+ return $ diffUTCTime lastcommittime now > 300
+
inDb :: FsckHandle -> Key -> IO Bool
inDb (FsckHandle h _) = H.queryDb h . inDb' . toSKey
diff --git a/Database/Handle.hs b/Database/Handle.hs
index dc3363e48..1fd9f7834 100644
--- a/Database/Handle.hs
+++ b/Database/Handle.hs
@@ -20,6 +20,7 @@ module Database.Handle (
) where
import Utility.Exception
+import Utility.Monad
import Messages
import Database.Persist.Sqlite
@@ -33,6 +34,7 @@ import qualified Data.Text as T
import Control.Monad.Trans.Resource (runResourceT)
import Control.Monad.Logger (runNoLoggingT)
import Data.List
+import Data.Time.Clock
{- A DbHandle is a reference to a worker thread that communicates with
- the database. It has a MVar which Jobs are submitted to. -}
@@ -64,7 +66,7 @@ openDb :: FilePath -> TableName -> IO DbHandle
openDb db tablename = do
jobs <- newEmptyMVar
worker <- async (workerThread (T.pack db) tablename jobs)
- q <- newMVar emptyDbQueue
+ q <- newMVar =<< emptyDbQueue
return $ DbHandle worker jobs q
data Job
@@ -145,16 +147,19 @@ closeDb h@(DbHandle worker jobs _) = do
type Size = Int
+type LastCommitTime = UTCTime
+
{- A queue of actions to perform, with a count of the number of actions
- - queued. -}
-data DbQueue = DbQueue Size (SqlPersistM ())
+ - queued, and a last commit time. -}
+data DbQueue = DbQueue Size LastCommitTime (SqlPersistM ())
-emptyDbQueue :: DbQueue
-emptyDbQueue = DbQueue 0 (return ())
+emptyDbQueue :: IO DbQueue
+emptyDbQueue = do
+ now <- getCurrentTime
+ return $ DbQueue 0 now (return ())
{- Queues a change to be made to the database. It will be buffered
- - to be committed later, unless the queue gets larger than the specified
- - size.
+ - to be committed later, unless the commitchecker action returns true.
-
- (Be sure to call closeDb or flushQueueDb to ensure the change
- gets committed.)
@@ -164,25 +169,32 @@ emptyDbQueue = DbQueue 0 (return ())
- process, the transaction is put back in the queue. This solves
- the sqlite multiple writer problem.
-}
-queueDb :: DbHandle -> Size -> SqlPersistM () -> IO ()
-queueDb h@(DbHandle _ _ qvar) maxsz a = do
- DbQueue sz qa <- takeMVar qvar
+queueDb
+ :: DbHandle
+ -> (Size -> LastCommitTime -> IO Bool)
+ -> SqlPersistM ()
+ -> IO ()
+queueDb h@(DbHandle _ _ qvar) commitchecker a = do
+ DbQueue sz lastcommittime qa <- takeMVar qvar
let !sz' = sz + 1
let qa' = qa >> a
- let enqueue newsz = putMVar qvar (DbQueue newsz qa')
- if sz' > maxsz
- then do
+ let enqueue = putMVar qvar
+ ifM (commitchecker sz' lastcommittime)
+ ( do
r <- commitDb h qa'
case r of
- Left _ -> enqueue 0
- Right _ -> putMVar qvar emptyDbQueue
- else enqueue sz'
+ Left _ -> enqueue $ DbQueue sz' lastcommittime qa'
+ Right _ -> do
+ now <- getCurrentTime
+ enqueue $ DbQueue 0 now (return ())
+ , enqueue $ DbQueue sz' lastcommittime qa'
+ )
{- If flushing the queue fails, this could be because there is another
- writer to the database. Retry repeatedly for up to 10 seconds. -}
flushQueueDb :: DbHandle -> IO ()
flushQueueDb h@(DbHandle _ _ qvar) = do
- DbQueue sz qa <- takeMVar qvar
+ DbQueue sz _ qa <- takeMVar qvar
when (sz > 0) $
robustly Nothing 100 (commitDb h qa)
where
diff --git a/Limit.hs b/Limit.hs
index da9b6a155..6930ab06d 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -13,6 +13,7 @@ import qualified Utility.Matcher
import qualified Remote
import qualified Backend
import Annex.Content
+import Annex.Action
import Annex.UUID
import Logs.Trust
import Annex.NumCopies
@@ -271,6 +272,7 @@ addTimeLimit s = do
if now > cutoff
then do
warning $ "Time limit (" ++ s ++ ") reached!"
+ shutdown True
liftIO $ exitWith $ ExitFailure 101
else return True
diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs
index b9cdc458e..c610938a9 100644
--- a/Remote/Rsync.hs
+++ b/Remote/Rsync.hs
@@ -33,8 +33,6 @@ import Utility.Rsync
import Utility.CopyFile
import Messages.Progress
import Utility.Metered
-import Utility.PID
-import Annex.Perms
import Logs.Transfer
import Types.Creds
import Types.Key (isChunkKey)
diff --git a/debian/changelog b/debian/changelog
index 740e7e0b5..bfcd6725a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,17 @@
+git-annex (5.20150732) UNRELEASED; urgency=medium
+
+ * Perform a clean shutdown when --time-limit is reached.
+ This includes running queued git commands, and cleanup actions normally
+ run when a command is finished.
+ * fsck: Commit incremental fsck database when --time-limit is reached.
+ Previously, some of the last files fscked did not make it into the
+ database when using --time-limit.
+ * fsck: Commit incremental fsck database after every 1000 files
+ fscked, or every 5 minutes, whichever comes first. Previously,
+ commits were made every 1000 files fscked.
+
+ -- Joey Hess <id@joeyh.name> Fri, 31 Jul 2015 12:31:39 -0400
+
git-annex (5.20150731) unstable; urgency=medium
* webapp: Support enabling known gitlab.com remotes.
diff --git a/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn
index 001284821..8cc989a0b 100644
--- a/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn
+++ b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn
@@ -21,3 +21,14 @@ Also tried with `docker/ubuntu:latest` using a clean install from https://downlo
git-annex: 5.20150522-gb199d65
Linux: 3.16.0-43-generic #58-Ubuntu SMP Fri Jun 19 11:04:02 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux
+> I've adjusted the timing of the fsck checkpoints used in resumes some.
+> Now it makes one every 5 minutes, or every 1000 files, whichever comes
+> first. I could make this tunable, but I don't think it's worth adding the
+> complexity; this heuristic should work pretty well.
+>
+> Another approach would be to catch sigint and commit the fsck database
+> then, as is now done when --time-limit interrupts a fsck run.
+> But, I am leery of complicating git-annex with signal handling,
+> so I've not done that currently.
+>
+> Also, documented this in fsck's man page. [[done]] --[[Joey]]
diff --git a/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_/comment_3_4afd7d73952c7f9172c80f20a5047625._comment b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_/comment_3_4afd7d73952c7f9172c80f20a5047625._comment
new file mode 100644
index 000000000..014b1a431
--- /dev/null
+++ b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_/comment_3_4afd7d73952c7f9172c80f20a5047625._comment
@@ -0,0 +1,9 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 3"""
+ date="2015-07-31T20:01:09Z"
+ content="""
+Yeah, very good point about --time-limit. I've gone ahead and made that
+result in a fsck database save, so it will pick up right where it left off
+when using --time-limit.
+"""]]
diff --git a/doc/devblog/day_307__two_release_week.mdwn b/doc/devblog/day_307__two_release_week.mdwn
new file mode 100644
index 000000000..32202906f
--- /dev/null
+++ b/doc/devblog/day_307__two_release_week.mdwn
@@ -0,0 +1,11 @@
+Made a release this morning, mostly because the release earlier this week
+turns out to have accidentially removed several options from `git annex copy`.
+
+Spent some time this afternoon improving how git-annex shuts down when
+--time-limit is used. This used to be a quick and dirty shutdown, similar
+to if git-annex were ctrl-c'd, but I reworked things so it does a clean
+shutdown, including running any buffered git commands.
+This made incremental fsck with --time-limit resume much better, since
+it saves the incremental fsck database on shutdown. Also tuned when the
+database gets checkpointed during an incremental fsck, to resume better after
+it's interrupted.
diff --git a/doc/git-annex-fsck.mdwn b/doc/git-annex-fsck.mdwn
index 73c401eb3..68c824c91 100644
--- a/doc/git-annex-fsck.mdwn
+++ b/doc/git-annex-fsck.mdwn
@@ -37,7 +37,12 @@ With parameters, only the specified files are checked.
* `--more`
- Continue the last incremental fsck pass, where it left off.
+ Resume the last incremental fsck pass, where it left off.
+
+ Resuming may redundantly check some files that were checked
+ before. Any files that fsck found problems with before will be re-checked
+ on resume. Also, checkpoints are made every 1000 files or every 5 minutes
+ during a fsck, and it resumes from the last checkpoint.
* `--incremental-schedule=time`