diff options
author | Joey Hess <joey@kitenet.net> | 2013-03-10 21:36:13 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2013-03-10 22:10:26 -0400 |
commit | 9d5b42222a10050fc733a49c427359f88e8be36c (patch) | |
tree | 4fa0b1d6cf4a38ba772557dc4277fbd887020e46 | |
parent | e107ddb4507d92303e14ebee3ccdec7662350031 (diff) |
assistant: generate better commits for renames
-rw-r--r-- | Assistant/Changes.hs | 11 | ||||
-rw-r--r-- | Assistant/Commits.hs | 9 | ||||
-rw-r--r-- | Assistant/Threads/Committer.hs | 116 | ||||
-rw-r--r-- | Assistant/Types/Changes.hs | 3 | ||||
-rw-r--r-- | Assistant/Types/Commits.hs | 4 | ||||
-rw-r--r-- | Utility/TSet.hs | 28 | ||||
-rw-r--r-- | Utility/ThreadScheduler.hs | 12 | ||||
-rw-r--r-- | debian/changelog | 1 | ||||
-rw-r--r-- | doc/design/assistant/blog/day_210__spring.mdwn | 14 |
9 files changed, 130 insertions, 68 deletions
diff --git a/Assistant/Changes.hs b/Assistant/Changes.hs index 9114f5124..60372f316 100644 --- a/Assistant/Changes.hs +++ b/Assistant/Changes.hs @@ -12,6 +12,7 @@ import Assistant.Types.Changes import Utility.TSet import Data.Time.Clock +import Control.Concurrent.STM {- Handlers call this when they made a change that needs to get committed. -} madeChange :: FilePath -> ChangeInfo -> Assistant (Maybe Change) @@ -27,13 +28,17 @@ pendingAddChange f = Just <$> (PendingAddChange <$> liftIO getCurrentTime <*> pu {- Gets all unhandled changes. - Blocks until at least one change is made. -} getChanges :: Assistant [Change] -getChanges = getTSet <<~ changeChan +getChanges = (atomically . getTSet) <<~ changeChan + +{- Gets all unhandled changes, without blocking. -} +getAnyChanges :: Assistant [Change] +getAnyChanges = (atomically . readTSet) <<~ changeChan {- Puts unhandled changes back into the channel. - Note: Original order is not preserved. -} refillChanges :: [Change] -> Assistant () -refillChanges cs = flip putTSet cs <<~ changeChan +refillChanges cs = (atomically . flip putTSet cs) <<~ changeChan {- Records a change in the channel. -} recordChange :: Change -> Assistant () -recordChange c = flip putTSet1 c <<~ changeChan +recordChange c = (atomically . flip putTSet1 c) <<~ changeChan diff --git a/Assistant/Commits.hs b/Assistant/Commits.hs index 79555fee5..f4e908e0d 100644 --- a/Assistant/Commits.hs +++ b/Assistant/Commits.hs @@ -9,19 +9,20 @@ module Assistant.Commits where import Assistant.Common import Assistant.Types.Commits - import Utility.TSet +import Control.Concurrent.STM + {- Gets all unhandled commits. - Blocks until at least one commit is made. -} getCommits :: Assistant [Commit] -getCommits = getTSet <<~ commitChan +getCommits = (atomically . getTSet) <<~ commitChan {- Puts unhandled commits back into the channel. - Note: Original order is not preserved. -} refillCommits :: [Commit] -> Assistant () -refillCommits cs = flip putTSet cs <<~ commitChan +refillCommits cs = (atomically . flip putTSet cs) <<~ commitChan {- Records a commit in the channel. -} recordCommit :: Assistant () -recordCommit = flip putTSet1 Commit <<~ commitChan +recordCommit = (atomically . flip putTSet1 Commit) <<~ commitChan diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 2450861e6..b336f650b 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -38,6 +38,7 @@ import Data.Time.Clock import Data.Tuple.Utils import qualified Data.Set as S import Data.Either +import Control.Concurrent {- This thread makes git commits at appropriate times. -} commitThread :: NamedThread @@ -45,34 +46,81 @@ commitThread = namedThread "Committer" $ do delayadd <- liftAnnex $ maybe delayaddDefault (return . Just . Seconds) =<< annexDelayAdd <$> Annex.getGitConfig - runEvery (Seconds 1) <~> do - -- We already waited one second as a simple rate limiter. - -- Next, wait until at least one change is available for - -- processing. - changes <- getChanges - -- Now see if now's a good time to commit. - time <- liftIO getCurrentTime - if shouldCommit time changes + waitChangeTime $ \(changes, time) -> do + readychanges <- handleAdds delayadd changes + if shouldCommit time readychanges then do - readychanges <- handleAdds delayadd changes - if shouldCommit time readychanges - then do - debug - [ "committing" - , show (length readychanges) - , "changes" - ] - void $ alertWhile commitAlert $ - liftAnnex commitStaged - recordCommit - mapM_ checkChangeContent readychanges - else refill readychanges - else refill changes + debug + [ "committing" + , show (length readychanges) + , "changes" + ] + void $ alertWhile commitAlert $ + liftAnnex commitStaged + recordCommit + mapM_ checkChangeContent readychanges + else refill readychanges + +refill :: [Change] -> Assistant () +refill [] = noop +refill cs = do + debug ["delaying commit of", show (length cs), "changes"] + refillChanges cs + +{- Wait for one or more changes to arrive to be committed. -} +waitChangeTime :: (([Change], UTCTime) -> Assistant ()) -> Assistant () +waitChangeTime a = runEvery (Seconds 1) <~> do + -- We already waited one second as a simple rate limiter. + -- Next, wait until at least one change is available for + -- processing. + changes <- getChanges + -- See if now's a good time to commit. + now <- liftIO getCurrentTime + case (shouldCommit now changes, lonelychange changes) of + (True, False) -> a (changes, now) + (True, True) -> do + -- Wait for other, related changes to arrive. + liftIO $ humanImperceptibleDelay + -- Don't block, but are there any? + morechanges <- getAnyChanges + let allchanges = changes++morechanges + a (allchanges, now) + _ -> refill changes + where + {- Did we perhaps only get one of the AddChange and RmChange pair + - that make up a rename? -} + lonelychange [(PendingAddChange _ _)] = True + lonelychange [(Change { changeInfo = i })] | i == RmChange = True + lonelychange _ = False + +{- An amount of time that is hopefully imperceptably short for humans, + - while long enough for a computer to get some work done. + - Note that 0.001 is a little too short for rename change batching to + - work. -} +humanImperceptibleInterval :: NominalDiffTime +humanImperceptibleInterval = 0.01 + +humanImperceptibleDelay :: IO () +humanImperceptibleDelay = threadDelay $ + truncate $ humanImperceptibleInterval * fromIntegral oneSecond + +{- Decide if now is a good time to make a commit. + - Note that the list of changes has an undefined order. + - + - Current strategy: If there have been 10 changes within the past second, + - a batch activity is taking place, so wait for later. + -} +shouldCommit :: UTCTime -> [Change] -> Bool +shouldCommit now changes + | len == 0 = False + | len > 10000 = True -- avoid bloating queue too much + | length recentchanges < 10 = True + | otherwise = False -- batch activity where - refill [] = noop - refill cs = do - debug ["delaying commit of", show (length cs), "changes"] - refillChanges cs + len = length changes + thissecond c = timeDelta c <= 1 + recentchanges = filter thissecond changes + timeDelta c = now `diffUTCTime` changeTime c commitStaged :: Annex Bool commitStaged = do @@ -105,22 +153,6 @@ commitStaged = do | otherwise = Param "--allow-empty-message" : Param "-m" : Param "" : ps -{- Decide if now is a good time to make a commit. - - Note that the list of changes has an undefined order. - - - - Current strategy: If there have been 10 changes within the past second, - - a batch activity is taking place, so wait for later. - -} -shouldCommit :: UTCTime -> [Change] -> Bool -shouldCommit now changes - | len == 0 = False - | len > 10000 = True -- avoid bloating queue too much - | length (filter thisSecond changes) < 10 = True - | otherwise = False -- batch activity - where - len = length changes - thisSecond c = now `diffUTCTime` changeTime c <= 1 - {- OSX needs a short delay after a file is added before locking it down, - when using a non-direct mode repository, as pasting a file seems to - try to set file permissions or otherwise access the file after closing diff --git a/Assistant/Types/Changes.hs b/Assistant/Types/Changes.hs index d4e1b28bc..ee797b8fe 100644 --- a/Assistant/Types/Changes.hs +++ b/Assistant/Types/Changes.hs @@ -12,6 +12,7 @@ import Types.Key import Utility.TSet import Data.Time.Clock +import Control.Concurrent.STM data ChangeInfo = AddChange Key | LinkChange (Maybe Key) | RmChange | RmDirChange deriving (Show, Eq) @@ -40,7 +41,7 @@ data Change deriving (Show) newChangeChan :: IO ChangeChan -newChangeChan = newTSet +newChangeChan = atomically newTSet isPendingAddChange :: Change -> Bool isPendingAddChange (PendingAddChange {}) = True diff --git a/Assistant/Types/Commits.hs b/Assistant/Types/Commits.hs index bb17c578b..12dab8da8 100644 --- a/Assistant/Types/Commits.hs +++ b/Assistant/Types/Commits.hs @@ -9,9 +9,11 @@ module Assistant.Types.Commits where import Utility.TSet +import Control.Concurrent.STM + type CommitChan = TSet Commit data Commit = Commit newCommitChan :: IO CommitChan -newCommitChan = newTSet +newCommitChan = atomically newTSet diff --git a/Utility/TSet.hs b/Utility/TSet.hs index bb711a4fb..c5ee22c89 100644 --- a/Utility/TSet.hs +++ b/Utility/TSet.hs @@ -1,6 +1,6 @@ {- Transactional sets - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2012, 2013 Joey Hess <joey@kitenet.net> -} module Utility.TSet where @@ -11,18 +11,20 @@ import Control.Concurrent.STM type TSet = TChan -runTSet :: STM a -> IO a -runTSet = atomically - -newTSet :: IO (TSet a) -newTSet = atomically newTChan +newTSet :: STM (TSet a) +newTSet = newTChan {- Gets the contents of the TSet. Blocks until at least one item is - present. -} -getTSet :: TSet a -> IO [a] -getTSet tset = runTSet $ do +getTSet :: TSet a -> STM [a] +getTSet tset = do c <- readTChan tset - go [c] + l <- readTSet tset + return $ c:l + +{- Gets anything currently in the TSet, without blocking. -} +readTSet :: TSet a -> STM [a] +readTSet tset = go [] where go l = do v <- tryReadTChan tset @@ -31,9 +33,9 @@ getTSet tset = runTSet $ do Just c -> go (c:l) {- Puts items into a TSet. -} -putTSet :: TSet a -> [a] -> IO () -putTSet tset vs = runTSet $ mapM_ (writeTChan tset) vs +putTSet :: TSet a -> [a] -> STM () +putTSet tset vs = mapM_ (writeTChan tset) vs {- Put a single item into a TSet. -} -putTSet1 :: TSet a -> a -> IO () -putTSet1 tset v = void $ runTSet $ writeTChan tset v +putTSet1 :: TSet a -> a -> STM () +putTSet1 tset v = void $ writeTChan tset v diff --git a/Utility/ThreadScheduler.hs b/Utility/ThreadScheduler.hs index a32606cfd..25ccbf25e 100644 --- a/Utility/ThreadScheduler.hs +++ b/Utility/ThreadScheduler.hs @@ -1,6 +1,6 @@ {- thread scheduling - - - Copyright 2012 Joey Hess <joey@kitenet.net> + - Copyright 2012, 2013 Joey Hess <joey@kitenet.net> - Copyright 2011 Bas van Dijk & Roel van Dijk - - Licensed under the GNU GPL version 3 or higher. @@ -14,6 +14,7 @@ import Common import Control.Concurrent import System.Posix.Signals +import Data.Time.Clock #ifndef __ANDROID__ import System.Posix.Terminal #endif @@ -21,6 +22,8 @@ import System.Posix.Terminal newtype Seconds = Seconds { fromSeconds :: Int } deriving (Eq, Ord, Show) +type Microseconds = Integer + {- Runs an action repeatedly forever, sleeping at least the specified number - of seconds in between. -} runEvery :: Seconds -> IO a -> IO a @@ -30,8 +33,6 @@ runEvery n a = forever $ do threadDelaySeconds :: Seconds -> IO () threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond) - where - oneSecond = 1000000 -- microseconds {- Like threadDelay, but not bounded by an Int. - @@ -42,7 +43,7 @@ threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond) - Taken from the unbounded-delay package to avoid a dependency for 4 lines - of code. -} -unboundDelay :: Integer -> IO () +unboundDelay :: Microseconds -> IO () unboundDelay time = do let maxWait = min time $ toInteger (maxBound :: Int) threadDelay $ fromInteger maxWait @@ -61,3 +62,6 @@ waitForTermination = do where check sig lock = void $ installHandler sig (CatchOnce $ putMVar lock ()) Nothing + +oneSecond :: Microseconds +oneSecond = 1000000 diff --git a/debian/changelog b/debian/changelog index 31cc06000..d02405938 100644 --- a/debian/changelog +++ b/debian/changelog @@ -42,6 +42,7 @@ git-annex (4.20130228) UNRELEASED; urgency=low (See https://github.com/yesodweb/wai/issues/146) * bugfix: drop --from an unavailable remote no longer updates the location log, incorrectly, to say the remote does not have the key. + * assistant: Generate better commits for renames. -- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400 diff --git a/doc/design/assistant/blog/day_210__spring.mdwn b/doc/design/assistant/blog/day_210__spring.mdwn index 061110b08..ff34cfbfe 100644 --- a/doc/design/assistant/blog/day_210__spring.mdwn +++ b/doc/design/assistant/blog/day_210__spring.mdwn @@ -13,3 +13,17 @@ Fixed some problems around USB drives. One was a real jaw-dropping bug: "git annex drop --from usbdrive" when the drive was not connected still updated the location log to indicate it did not have the file anymore! (Thank goodness for fsck..) + +I've noticed that moving around files in direct mode repos is inneficient, +because the assistant re-checksums the "new" file. One way to avoid +that would be to have a lookup table from (inode, size, mtime) to +key, but I don't have one, and would like to avoid adding one. + +Instead, I have a cunning plan to deal with this heuristically. If the +assistant can notice a file was removed and another file added at the same +time, it can compare the (inode, size, mtime) to see if it's a rename, and +avoid the checksum overhead. + +The first step to getting there was to make the assistant better at +batching together delete+add events into a single rename commit. I'm happy +to say I've accomplished that, with no perceptable delay to commits. |