summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar guilhem <guilhem@fripost.org>2013-08-26 02:47:49 +0200
committerGravatar Joey Hess <joey@kitenet.net>2013-08-25 21:02:13 -0400
commitdf48567279fa504ca31d9f4f25b06066f08d1128 (patch)
tree8b40f2d55abe2e74f870696db131536c43794116
parentb20ea840b89f7b1584b3cadcc195508ff824ad54 (diff)
Speed up the 'unused' command.
Instead of populating the second-level Bloom filter with every key referenced in every Git reference, consider only those which differ from what's referenced in the index. Incidentaly, unlike with its old behavior, staged modifications/deletion/... will now be detected by 'unused'. Credits to joeyh for the algorithm. :-)
-rw-r--r--Command/PreCommit.hs3
-rw-r--r--Command/Unused.hs33
-rw-r--r--Git/DiffTree.hs10
-rw-r--r--Utility/Monad.hs12
4 files changed, 32 insertions, 26 deletions
diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs
index 565344d25..c6d9dd278 100644
--- a/Command/PreCommit.hs
+++ b/Command/PreCommit.hs
@@ -12,6 +12,7 @@ import Command
import qualified Command.Add
import qualified Command.Fix
import qualified Git.DiffTree
+import qualified Git.Ref
import Annex.CatFile
import Annex.Content.Direct
import Git.Sha
@@ -38,7 +39,7 @@ startIndirect file = next $ do
startDirect :: [String] -> CommandStart
startDirect _ = next $ do
- (diffs, clean) <- inRepo $ Git.DiffTree.diffIndex
+ (diffs, clean) <- inRepo $ Git.DiffTree.diffIndex Git.Ref.headRef
forM_ diffs go
next $ liftIO clean
where
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 7c43cbc6f..50fdf0da2 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -21,7 +21,6 @@ import Common.Annex
import Command
import Logs.Unused
import Annex.Content
-import Utility.FileMode
import Logs.Location
import Logs.Transfer
import qualified Annex
@@ -29,7 +28,7 @@ import qualified Git
import qualified Git.Command
import qualified Git.Ref
import qualified Git.LsFiles as LsFiles
-import qualified Git.LsTree as LsTree
+import qualified Git.DiffTree as DiffTree
import qualified Backend
import qualified Remote
import qualified Annex.Branch
@@ -255,35 +254,31 @@ withKeysReferenced' mdir initial a = do
withKeysReferencedInGit :: (Key -> Annex ()) -> Annex ()
withKeysReferencedInGit a = do
- rs <- relevantrefs <$> showref
- forM_ rs (withKeysReferencedInGitRef a)
+ showref >>= mapM_ (withKeysReferencedInGitRef a) . relevantrefs
where
showref = inRepo $ Git.Command.pipeReadStrict [Param "show-ref"]
- relevantrefs = map (Git.Ref . snd) .
+ relevantrefs = map (Git.Ref . snd) .
nubBy uniqref .
filter ourbranches .
- map (separate (== ' ')) . lines
+ map (separate (== ' ')) .
+ lines
uniqref (x, _) (y, _) = x == y
ourbranchend = '/' : show Annex.Branch.name
ourbranches (_, b) = not (ourbranchend `isSuffixOf` b)
&& not ("refs/synced/" `isPrefixOf` b)
+{- Runs an action on keys referenced in the given Git reference which
+ - differ from those referenced in the index. -}
withKeysReferencedInGitRef :: (Key -> Annex ()) -> Git.Ref -> Annex ()
withKeysReferencedInGitRef a ref = do
showAction $ "checking " ++ Git.Ref.describe ref
- go <=< inRepo $ LsTree.lsTree ref
- where
- go [] = noop
- go (l:ls)
- | isSymLink (LsTree.mode l) = do
- content <- encodeW8 . L.unpack
- <$> catFile ref (LsTree.file l)
- case fileKey (takeFileName content) of
- Nothing -> go ls
- Just k -> do
- a k
- go ls
- | otherwise = go ls
+ (ts,clean) <- inRepo $ DiffTree.diffIndex ref
+ -- if 'dstsha' is 0{40}, the key will be Nothing
+ forM_ ts $ catObject . DiffTree.dstsha >=>
+ encodeW8 . L.unpack *>=>
+ fileKey . takeFileName *>=>
+ maybe noop a
+ liftIO $ void clean
{- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable.
diff --git a/Git/DiffTree.hs b/Git/DiffTree.hs
index cf8a37600..8f85fcc34 100644
--- a/Git/DiffTree.hs
+++ b/Git/DiffTree.hs
@@ -41,14 +41,14 @@ diffTreeRecursive :: Ref -> Ref -> Repo -> IO ([DiffTreeItem], IO Bool)
diffTreeRecursive src dst = getdiff (Param "diff-tree")
[Param "-r", Param (show src), Param (show dst)]
-{- Diffs between the repository and index. Does nothing if there is not
- - yet a commit in the repository. -}
-diffIndex :: Repo -> IO ([DiffTreeItem], IO Bool)
-diffIndex repo = do
+{- Diffs between a tree and the index. Does nothing if there is not yet a
+ - commit in the repository. -}
+diffIndex :: Ref -> Repo -> IO ([DiffTreeItem], IO Bool)
+diffIndex ref repo = do
ifM (Git.Ref.headExists repo)
( getdiff (Param "diff-index")
[ Param "--cached"
- , Param $ show Git.Ref.headRef
+ , Param $ show ref
] repo
, return ([], return True)
)
diff --git a/Utility/Monad.hs b/Utility/Monad.hs
index b66419f76..4f5a6d244 100644
--- a/Utility/Monad.hs
+++ b/Utility/Monad.hs
@@ -8,7 +8,7 @@
module Utility.Monad where
import Data.Maybe
-import Control.Monad (liftM)
+import Control.Monad
{- Return the first value from a list, if any, satisfying the given
- predicate -}
@@ -53,6 +53,16 @@ ma <&&> mb = ifM ma ( mb , return False )
infixr 3 <&&>
infixr 2 <||>
+{- Left-to-right Kleisli composition with a pure left/right hand side. -}
+(*>=>) :: Monad m => (a -> b) -> (b -> m c) -> (a -> m c)
+f *>=> g = return . f >=> g
+
+(>=*>) :: Monad m => (a -> m b) -> (b -> c) -> (a -> m c)
+f >=*> g = f >=> return . g
+
+{- Same fixity as >=> and <=< -}
+infixr 1 *>=>, >=*>
+
{- Runs an action, passing its value to an observer before returning it. -}
observe :: Monad m => (a -> m b) -> m a -> m a
observe observer a = do