diff options
author | guilhem <guilhem@fripost.org> | 2013-08-26 02:47:49 +0200 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2013-08-25 21:02:13 -0400 |
commit | df48567279fa504ca31d9f4f25b06066f08d1128 (patch) | |
tree | 8b40f2d55abe2e74f870696db131536c43794116 | |
parent | b20ea840b89f7b1584b3cadcc195508ff824ad54 (diff) |
Speed up the 'unused' command.
Instead of populating the second-level Bloom filter with every key
referenced in every Git reference, consider only those which differ
from what's referenced in the index.
Incidentaly, unlike with its old behavior, staged
modifications/deletion/... will now be detected by 'unused'.
Credits to joeyh for the algorithm. :-)
-rw-r--r-- | Command/PreCommit.hs | 3 | ||||
-rw-r--r-- | Command/Unused.hs | 33 | ||||
-rw-r--r-- | Git/DiffTree.hs | 10 | ||||
-rw-r--r-- | Utility/Monad.hs | 12 |
4 files changed, 32 insertions, 26 deletions
diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 565344d25..c6d9dd278 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -12,6 +12,7 @@ import Command import qualified Command.Add import qualified Command.Fix import qualified Git.DiffTree +import qualified Git.Ref import Annex.CatFile import Annex.Content.Direct import Git.Sha @@ -38,7 +39,7 @@ startIndirect file = next $ do startDirect :: [String] -> CommandStart startDirect _ = next $ do - (diffs, clean) <- inRepo $ Git.DiffTree.diffIndex + (diffs, clean) <- inRepo $ Git.DiffTree.diffIndex Git.Ref.headRef forM_ diffs go next $ liftIO clean where diff --git a/Command/Unused.hs b/Command/Unused.hs index 7c43cbc6f..50fdf0da2 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -21,7 +21,6 @@ import Common.Annex import Command import Logs.Unused import Annex.Content -import Utility.FileMode import Logs.Location import Logs.Transfer import qualified Annex @@ -29,7 +28,7 @@ import qualified Git import qualified Git.Command import qualified Git.Ref import qualified Git.LsFiles as LsFiles -import qualified Git.LsTree as LsTree +import qualified Git.DiffTree as DiffTree import qualified Backend import qualified Remote import qualified Annex.Branch @@ -255,35 +254,31 @@ withKeysReferenced' mdir initial a = do withKeysReferencedInGit :: (Key -> Annex ()) -> Annex () withKeysReferencedInGit a = do - rs <- relevantrefs <$> showref - forM_ rs (withKeysReferencedInGitRef a) + showref >>= mapM_ (withKeysReferencedInGitRef a) . relevantrefs where showref = inRepo $ Git.Command.pipeReadStrict [Param "show-ref"] - relevantrefs = map (Git.Ref . snd) . + relevantrefs = map (Git.Ref . snd) . nubBy uniqref . filter ourbranches . - map (separate (== ' ')) . lines + map (separate (== ' ')) . + lines uniqref (x, _) (y, _) = x == y ourbranchend = '/' : show Annex.Branch.name ourbranches (_, b) = not (ourbranchend `isSuffixOf` b) && not ("refs/synced/" `isPrefixOf` b) +{- Runs an action on keys referenced in the given Git reference which + - differ from those referenced in the index. -} withKeysReferencedInGitRef :: (Key -> Annex ()) -> Git.Ref -> Annex () withKeysReferencedInGitRef a ref = do showAction $ "checking " ++ Git.Ref.describe ref - go <=< inRepo $ LsTree.lsTree ref - where - go [] = noop - go (l:ls) - | isSymLink (LsTree.mode l) = do - content <- encodeW8 . L.unpack - <$> catFile ref (LsTree.file l) - case fileKey (takeFileName content) of - Nothing -> go ls - Just k -> do - a k - go ls - | otherwise = go ls + (ts,clean) <- inRepo $ DiffTree.diffIndex ref + -- if 'dstsha' is 0{40}, the key will be Nothing + forM_ ts $ catObject . DiffTree.dstsha >=> + encodeW8 . L.unpack *>=> + fileKey . takeFileName *>=> + maybe noop a + liftIO $ void clean {- Looks in the specified directory for bad/tmp keys, and returns a list - of those that might still have value, or might be stale and removable. diff --git a/Git/DiffTree.hs b/Git/DiffTree.hs index cf8a37600..8f85fcc34 100644 --- a/Git/DiffTree.hs +++ b/Git/DiffTree.hs @@ -41,14 +41,14 @@ diffTreeRecursive :: Ref -> Ref -> Repo -> IO ([DiffTreeItem], IO Bool) diffTreeRecursive src dst = getdiff (Param "diff-tree") [Param "-r", Param (show src), Param (show dst)] -{- Diffs between the repository and index. Does nothing if there is not - - yet a commit in the repository. -} -diffIndex :: Repo -> IO ([DiffTreeItem], IO Bool) -diffIndex repo = do +{- Diffs between a tree and the index. Does nothing if there is not yet a + - commit in the repository. -} +diffIndex :: Ref -> Repo -> IO ([DiffTreeItem], IO Bool) +diffIndex ref repo = do ifM (Git.Ref.headExists repo) ( getdiff (Param "diff-index") [ Param "--cached" - , Param $ show Git.Ref.headRef + , Param $ show ref ] repo , return ([], return True) ) diff --git a/Utility/Monad.hs b/Utility/Monad.hs index b66419f76..4f5a6d244 100644 --- a/Utility/Monad.hs +++ b/Utility/Monad.hs @@ -8,7 +8,7 @@ module Utility.Monad where import Data.Maybe -import Control.Monad (liftM) +import Control.Monad {- Return the first value from a list, if any, satisfying the given - predicate -} @@ -53,6 +53,16 @@ ma <&&> mb = ifM ma ( mb , return False ) infixr 3 <&&> infixr 2 <||> +{- Left-to-right Kleisli composition with a pure left/right hand side. -} +(*>=>) :: Monad m => (a -> b) -> (b -> m c) -> (a -> m c) +f *>=> g = return . f >=> g + +(>=*>) :: Monad m => (a -> m b) -> (b -> c) -> (a -> m c) +f >=*> g = f >=> return . g + +{- Same fixity as >=> and <=< -} +infixr 1 *>=>, >=*> + {- Runs an action, passing its value to an observer before returning it. -} observe :: Monad m => (a -> m b) -> m a -> m a observe observer a = do |