From 216bb57e1060dbf82c3c1ead19a7264b048372cf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 16 Dec 2013 15:43:28 -0400 Subject: assistant: Always batch changes found in startup scan. Batch detection is heuristic, so can sometimes fail. I observed one such failure while starting up in a repository with 87000 files. After the first several batches of ~5000 files, it fell out of batch mode, and never re-entered it, and so made many more commits of a few files at a time than necessary. So, let's always use batch mode when in the startup scan. This avoids the heuristic there, at least. There is clearly also room to improve the heuristic. Possibly 10 files is too high a bar to be found during a commit, on a system that can commit quickly. --- Assistant/Threads/Committer.hs | 10 ++++++---- Assistant/Threads/Watcher.hs | 5 +++++ debian/changelog | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index f736530e4..bb9b0e47e 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -52,7 +52,7 @@ commitThread = namedThread "Committer" $ do =<< annexDelayAdd <$> Annex.getGitConfig waitChangeTime $ \(changes, time) -> do readychanges <- handleAdds havelsof delayadd changes - if shouldCommit time (length readychanges) readychanges + if shouldCommit False time (length readychanges) readychanges then do debug [ "committing" @@ -94,7 +94,8 @@ waitChangeTime a = waitchanges 0 let len = length changes -- See if now's a good time to commit. now <- liftIO getCurrentTime - case (lastcommitsize >= maxCommitSize, shouldCommit now len changes, possiblyrename changes) of + scanning <- not . scanComplete <$> getDaemonStatus + case (lastcommitsize >= maxCommitSize, shouldCommit scanning now len changes, possiblyrename changes) of (True, True, _) | len > maxCommitSize -> waitchanges =<< a (changes, now) @@ -199,8 +200,9 @@ maxCommitSize = 5000 - Current strategy: If there have been 10 changes within the past second, - a batch activity is taking place, so wait for later. -} -shouldCommit :: UTCTime -> Int -> [Change] -> Bool -shouldCommit now len changes +shouldCommit :: Bool -> UTCTime -> Int -> [Change] -> Bool +shouldCommit scanning now len changes + | scanning = len >= maxCommitSize | len == 0 = False | len >= maxCommitSize = True | length recentchanges < 10 = True diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index d9afb9adf..50a0efdd5 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -144,6 +144,11 @@ startupScan scanner = do modifyDaemonStatus_ $ \s -> s { scanComplete = True } + -- Ensure that the Committer sees any changes + -- that it did not process, and acts on them now that + -- the scan is complete. + refillChanges =<< getAnyChanges + return (True, r) {- Hardcoded ignores, passed to the DirWatcher so it can avoid looking diff --git a/debian/changelog b/debian/changelog index 8b8968c4d..628307671 100644 --- a/debian/changelog +++ b/debian/changelog @@ -10,6 +10,7 @@ git-annex (5.20131214) UNRELEASED; urgency=low * Include man pages in Linux and OSX standalone builds. * Linux standalone build now includes its own glibc and forces the linker to use it, to remove dependence on the host glibc. + * assistant: Always batch changes found in startup scan. -- Joey Hess Sun, 15 Dec 2013 13:32:49 -0400 -- cgit v1.2.3