summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-03-10 21:36:13 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-03-10 22:10:26 -0400
commit9d5b42222a10050fc733a49c427359f88e8be36c (patch)
tree4fa0b1d6cf4a38ba772557dc4277fbd887020e46
parente107ddb4507d92303e14ebee3ccdec7662350031 (diff)
assistant: generate better commits for renames
-rw-r--r--Assistant/Changes.hs11
-rw-r--r--Assistant/Commits.hs9
-rw-r--r--Assistant/Threads/Committer.hs116
-rw-r--r--Assistant/Types/Changes.hs3
-rw-r--r--Assistant/Types/Commits.hs4
-rw-r--r--Utility/TSet.hs28
-rw-r--r--Utility/ThreadScheduler.hs12
-rw-r--r--debian/changelog1
-rw-r--r--doc/design/assistant/blog/day_210__spring.mdwn14
9 files changed, 130 insertions, 68 deletions
diff --git a/Assistant/Changes.hs b/Assistant/Changes.hs
index 9114f5124..60372f316 100644
--- a/Assistant/Changes.hs
+++ b/Assistant/Changes.hs
@@ -12,6 +12,7 @@ import Assistant.Types.Changes
import Utility.TSet
import Data.Time.Clock
+import Control.Concurrent.STM
{- Handlers call this when they made a change that needs to get committed. -}
madeChange :: FilePath -> ChangeInfo -> Assistant (Maybe Change)
@@ -27,13 +28,17 @@ pendingAddChange f = Just <$> (PendingAddChange <$> liftIO getCurrentTime <*> pu
{- Gets all unhandled changes.
- Blocks until at least one change is made. -}
getChanges :: Assistant [Change]
-getChanges = getTSet <<~ changeChan
+getChanges = (atomically . getTSet) <<~ changeChan
+
+{- Gets all unhandled changes, without blocking. -}
+getAnyChanges :: Assistant [Change]
+getAnyChanges = (atomically . readTSet) <<~ changeChan
{- Puts unhandled changes back into the channel.
- Note: Original order is not preserved. -}
refillChanges :: [Change] -> Assistant ()
-refillChanges cs = flip putTSet cs <<~ changeChan
+refillChanges cs = (atomically . flip putTSet cs) <<~ changeChan
{- Records a change in the channel. -}
recordChange :: Change -> Assistant ()
-recordChange c = flip putTSet1 c <<~ changeChan
+recordChange c = (atomically . flip putTSet1 c) <<~ changeChan
diff --git a/Assistant/Commits.hs b/Assistant/Commits.hs
index 79555fee5..f4e908e0d 100644
--- a/Assistant/Commits.hs
+++ b/Assistant/Commits.hs
@@ -9,19 +9,20 @@ module Assistant.Commits where
import Assistant.Common
import Assistant.Types.Commits
-
import Utility.TSet
+import Control.Concurrent.STM
+
{- Gets all unhandled commits.
- Blocks until at least one commit is made. -}
getCommits :: Assistant [Commit]
-getCommits = getTSet <<~ commitChan
+getCommits = (atomically . getTSet) <<~ commitChan
{- Puts unhandled commits back into the channel.
- Note: Original order is not preserved. -}
refillCommits :: [Commit] -> Assistant ()
-refillCommits cs = flip putTSet cs <<~ commitChan
+refillCommits cs = (atomically . flip putTSet cs) <<~ commitChan
{- Records a commit in the channel. -}
recordCommit :: Assistant ()
-recordCommit = flip putTSet1 Commit <<~ commitChan
+recordCommit = (atomically . flip putTSet1 Commit) <<~ commitChan
diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs
index 2450861e6..b336f650b 100644
--- a/Assistant/Threads/Committer.hs
+++ b/Assistant/Threads/Committer.hs
@@ -38,6 +38,7 @@ import Data.Time.Clock
import Data.Tuple.Utils
import qualified Data.Set as S
import Data.Either
+import Control.Concurrent
{- This thread makes git commits at appropriate times. -}
commitThread :: NamedThread
@@ -45,34 +46,81 @@ commitThread = namedThread "Committer" $ do
delayadd <- liftAnnex $
maybe delayaddDefault (return . Just . Seconds)
=<< annexDelayAdd <$> Annex.getGitConfig
- runEvery (Seconds 1) <~> do
- -- We already waited one second as a simple rate limiter.
- -- Next, wait until at least one change is available for
- -- processing.
- changes <- getChanges
- -- Now see if now's a good time to commit.
- time <- liftIO getCurrentTime
- if shouldCommit time changes
+ waitChangeTime $ \(changes, time) -> do
+ readychanges <- handleAdds delayadd changes
+ if shouldCommit time readychanges
then do
- readychanges <- handleAdds delayadd changes
- if shouldCommit time readychanges
- then do
- debug
- [ "committing"
- , show (length readychanges)
- , "changes"
- ]
- void $ alertWhile commitAlert $
- liftAnnex commitStaged
- recordCommit
- mapM_ checkChangeContent readychanges
- else refill readychanges
- else refill changes
+ debug
+ [ "committing"
+ , show (length readychanges)
+ , "changes"
+ ]
+ void $ alertWhile commitAlert $
+ liftAnnex commitStaged
+ recordCommit
+ mapM_ checkChangeContent readychanges
+ else refill readychanges
+
+refill :: [Change] -> Assistant ()
+refill [] = noop
+refill cs = do
+ debug ["delaying commit of", show (length cs), "changes"]
+ refillChanges cs
+
+{- Wait for one or more changes to arrive to be committed. -}
+waitChangeTime :: (([Change], UTCTime) -> Assistant ()) -> Assistant ()
+waitChangeTime a = runEvery (Seconds 1) <~> do
+ -- We already waited one second as a simple rate limiter.
+ -- Next, wait until at least one change is available for
+ -- processing.
+ changes <- getChanges
+ -- See if now's a good time to commit.
+ now <- liftIO getCurrentTime
+ case (shouldCommit now changes, lonelychange changes) of
+ (True, False) -> a (changes, now)
+ (True, True) -> do
+ -- Wait for other, related changes to arrive.
+ liftIO $ humanImperceptibleDelay
+ -- Don't block, but are there any?
+ morechanges <- getAnyChanges
+ let allchanges = changes++morechanges
+ a (allchanges, now)
+ _ -> refill changes
+ where
+ {- Did we perhaps only get one of the AddChange and RmChange pair
+ - that make up a rename? -}
+ lonelychange [(PendingAddChange _ _)] = True
+ lonelychange [(Change { changeInfo = i })] | i == RmChange = True
+ lonelychange _ = False
+
+{- An amount of time that is hopefully imperceptably short for humans,
+ - while long enough for a computer to get some work done.
+ - Note that 0.001 is a little too short for rename change batching to
+ - work. -}
+humanImperceptibleInterval :: NominalDiffTime
+humanImperceptibleInterval = 0.01
+
+humanImperceptibleDelay :: IO ()
+humanImperceptibleDelay = threadDelay $
+ truncate $ humanImperceptibleInterval * fromIntegral oneSecond
+
+{- Decide if now is a good time to make a commit.
+ - Note that the list of changes has an undefined order.
+ -
+ - Current strategy: If there have been 10 changes within the past second,
+ - a batch activity is taking place, so wait for later.
+ -}
+shouldCommit :: UTCTime -> [Change] -> Bool
+shouldCommit now changes
+ | len == 0 = False
+ | len > 10000 = True -- avoid bloating queue too much
+ | length recentchanges < 10 = True
+ | otherwise = False -- batch activity
where
- refill [] = noop
- refill cs = do
- debug ["delaying commit of", show (length cs), "changes"]
- refillChanges cs
+ len = length changes
+ thissecond c = timeDelta c <= 1
+ recentchanges = filter thissecond changes
+ timeDelta c = now `diffUTCTime` changeTime c
commitStaged :: Annex Bool
commitStaged = do
@@ -105,22 +153,6 @@ commitStaged = do
| otherwise = Param "--allow-empty-message"
: Param "-m" : Param "" : ps
-{- Decide if now is a good time to make a commit.
- - Note that the list of changes has an undefined order.
- -
- - Current strategy: If there have been 10 changes within the past second,
- - a batch activity is taking place, so wait for later.
- -}
-shouldCommit :: UTCTime -> [Change] -> Bool
-shouldCommit now changes
- | len == 0 = False
- | len > 10000 = True -- avoid bloating queue too much
- | length (filter thisSecond changes) < 10 = True
- | otherwise = False -- batch activity
- where
- len = length changes
- thisSecond c = now `diffUTCTime` changeTime c <= 1
-
{- OSX needs a short delay after a file is added before locking it down,
- when using a non-direct mode repository, as pasting a file seems to
- try to set file permissions or otherwise access the file after closing
diff --git a/Assistant/Types/Changes.hs b/Assistant/Types/Changes.hs
index d4e1b28bc..ee797b8fe 100644
--- a/Assistant/Types/Changes.hs
+++ b/Assistant/Types/Changes.hs
@@ -12,6 +12,7 @@ import Types.Key
import Utility.TSet
import Data.Time.Clock
+import Control.Concurrent.STM
data ChangeInfo = AddChange Key | LinkChange (Maybe Key) | RmChange | RmDirChange
deriving (Show, Eq)
@@ -40,7 +41,7 @@ data Change
deriving (Show)
newChangeChan :: IO ChangeChan
-newChangeChan = newTSet
+newChangeChan = atomically newTSet
isPendingAddChange :: Change -> Bool
isPendingAddChange (PendingAddChange {}) = True
diff --git a/Assistant/Types/Commits.hs b/Assistant/Types/Commits.hs
index bb17c578b..12dab8da8 100644
--- a/Assistant/Types/Commits.hs
+++ b/Assistant/Types/Commits.hs
@@ -9,9 +9,11 @@ module Assistant.Types.Commits where
import Utility.TSet
+import Control.Concurrent.STM
+
type CommitChan = TSet Commit
data Commit = Commit
newCommitChan :: IO CommitChan
-newCommitChan = newTSet
+newCommitChan = atomically newTSet
diff --git a/Utility/TSet.hs b/Utility/TSet.hs
index bb711a4fb..c5ee22c89 100644
--- a/Utility/TSet.hs
+++ b/Utility/TSet.hs
@@ -1,6 +1,6 @@
{- Transactional sets
-
- - Copyright 2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
-}
module Utility.TSet where
@@ -11,18 +11,20 @@ import Control.Concurrent.STM
type TSet = TChan
-runTSet :: STM a -> IO a
-runTSet = atomically
-
-newTSet :: IO (TSet a)
-newTSet = atomically newTChan
+newTSet :: STM (TSet a)
+newTSet = newTChan
{- Gets the contents of the TSet. Blocks until at least one item is
- present. -}
-getTSet :: TSet a -> IO [a]
-getTSet tset = runTSet $ do
+getTSet :: TSet a -> STM [a]
+getTSet tset = do
c <- readTChan tset
- go [c]
+ l <- readTSet tset
+ return $ c:l
+
+{- Gets anything currently in the TSet, without blocking. -}
+readTSet :: TSet a -> STM [a]
+readTSet tset = go []
where
go l = do
v <- tryReadTChan tset
@@ -31,9 +33,9 @@ getTSet tset = runTSet $ do
Just c -> go (c:l)
{- Puts items into a TSet. -}
-putTSet :: TSet a -> [a] -> IO ()
-putTSet tset vs = runTSet $ mapM_ (writeTChan tset) vs
+putTSet :: TSet a -> [a] -> STM ()
+putTSet tset vs = mapM_ (writeTChan tset) vs
{- Put a single item into a TSet. -}
-putTSet1 :: TSet a -> a -> IO ()
-putTSet1 tset v = void $ runTSet $ writeTChan tset v
+putTSet1 :: TSet a -> a -> STM ()
+putTSet1 tset v = void $ writeTChan tset v
diff --git a/Utility/ThreadScheduler.hs b/Utility/ThreadScheduler.hs
index a32606cfd..25ccbf25e 100644
--- a/Utility/ThreadScheduler.hs
+++ b/Utility/ThreadScheduler.hs
@@ -1,6 +1,6 @@
{- thread scheduling
-
- - Copyright 2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
- Copyright 2011 Bas van Dijk & Roel van Dijk
-
- Licensed under the GNU GPL version 3 or higher.
@@ -14,6 +14,7 @@ import Common
import Control.Concurrent
import System.Posix.Signals
+import Data.Time.Clock
#ifndef __ANDROID__
import System.Posix.Terminal
#endif
@@ -21,6 +22,8 @@ import System.Posix.Terminal
newtype Seconds = Seconds { fromSeconds :: Int }
deriving (Eq, Ord, Show)
+type Microseconds = Integer
+
{- Runs an action repeatedly forever, sleeping at least the specified number
- of seconds in between. -}
runEvery :: Seconds -> IO a -> IO a
@@ -30,8 +33,6 @@ runEvery n a = forever $ do
threadDelaySeconds :: Seconds -> IO ()
threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond)
- where
- oneSecond = 1000000 -- microseconds
{- Like threadDelay, but not bounded by an Int.
-
@@ -42,7 +43,7 @@ threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond)
- Taken from the unbounded-delay package to avoid a dependency for 4 lines
- of code.
-}
-unboundDelay :: Integer -> IO ()
+unboundDelay :: Microseconds -> IO ()
unboundDelay time = do
let maxWait = min time $ toInteger (maxBound :: Int)
threadDelay $ fromInteger maxWait
@@ -61,3 +62,6 @@ waitForTermination = do
where
check sig lock = void $
installHandler sig (CatchOnce $ putMVar lock ()) Nothing
+
+oneSecond :: Microseconds
+oneSecond = 1000000
diff --git a/debian/changelog b/debian/changelog
index 31cc06000..d02405938 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -42,6 +42,7 @@ git-annex (4.20130228) UNRELEASED; urgency=low
(See https://github.com/yesodweb/wai/issues/146)
* bugfix: drop --from an unavailable remote no longer updates the location
log, incorrectly, to say the remote does not have the key.
+ * assistant: Generate better commits for renames.
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400
diff --git a/doc/design/assistant/blog/day_210__spring.mdwn b/doc/design/assistant/blog/day_210__spring.mdwn
index 061110b08..ff34cfbfe 100644
--- a/doc/design/assistant/blog/day_210__spring.mdwn
+++ b/doc/design/assistant/blog/day_210__spring.mdwn
@@ -13,3 +13,17 @@ Fixed some problems around USB drives. One was a real jaw-dropping
bug: "git annex drop --from usbdrive" when the drive was not
connected still updated the location log to indicate it did not have
the file anymore! (Thank goodness for fsck..)
+
+I've noticed that moving around files in direct mode repos is inneficient,
+because the assistant re-checksums the "new" file. One way to avoid
+that would be to have a lookup table from (inode, size, mtime) to
+key, but I don't have one, and would like to avoid adding one.
+
+Instead, I have a cunning plan to deal with this heuristically. If the
+assistant can notice a file was removed and another file added at the same
+time, it can compare the (inode, size, mtime) to see if it's a rename, and
+avoid the checksum overhead.
+
+The first step to getting there was to make the assistant better at
+batching together delete+add events into a single rename commit. I'm happy
+to say I've accomplished that, with no perceptable delay to commits.