diff options
-rw-r--r-- | Annex.hs | 4 | ||||
-rw-r--r-- | Annex/Queue.hs | 22 | ||||
-rw-r--r-- | Git/Queue.hs | 32 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 8 |
5 files changed, 46 insertions, 22 deletions
@@ -76,12 +76,12 @@ data AnnexState = AnnexState { repo :: Git.Repo , backends :: [BackendA Annex] , remotes :: [Types.Remote.RemoteA Annex] - , repoqueue :: Git.Queue.Queue , output :: OutputType , force :: Bool , fast :: Bool , auto :: Bool , branchstate :: BranchState + , repoqueue :: Maybe Git.Queue.Queue , catfilehandle :: Maybe CatFileHandle , checkattrhandle :: Maybe CheckAttrHandle , forcebackend :: Maybe String @@ -100,12 +100,12 @@ newState gitrepo = AnnexState { repo = gitrepo , backends = [] , remotes = [] - , repoqueue = Git.Queue.new , output = NormalOutput , force = False , fast = False , auto = False , branchstate = startBranchState + , repoqueue = Nothing , catfilehandle = Nothing , checkattrhandle = Nothing , forcebackend = Nothing diff --git a/Annex/Queue.hs b/Annex/Queue.hs index f611cf02e..df6ba12a2 100644 --- a/Annex/Queue.hs +++ b/Annex/Queue.hs @@ -12,30 +12,42 @@ module Annex.Queue ( ) where import Common.Annex -import Annex +import Annex hiding (new) import qualified Git.Queue +import qualified Git.Config {- Adds a git command to the queue. -} add :: String -> [CommandParam] -> [FilePath] -> Annex () add command params files = do - q <- getState repoqueue + q <- get store $ Git.Queue.add q command params files {- Runs the queue if it is full. Should be called periodically. -} flushWhenFull :: Annex () flushWhenFull = do - q <- getState repoqueue + q <- get when (Git.Queue.full q) $ flush False {- Runs (and empties) the queue. -} flush :: Bool -> Annex () flush silent = do - q <- getState repoqueue + q <- get unless (0 == Git.Queue.size q) $ do unless silent $ showSideAction "Recording state in git" q' <- inRepo $ Git.Queue.flush q store q' +get :: Annex Git.Queue.Queue +get = maybe new return =<< getState repoqueue + +new :: Annex Git.Queue.Queue +new = do + q <- Git.Queue.new <$> fromRepo queuesize + store q + return q + where + queuesize r = readish =<< Git.Config.getMaybe "annex.queuesize" r + store :: Git.Queue.Queue -> Annex () -store q = changeState $ \s -> s { repoqueue = q } +store q = changeState $ \s -> s { repoqueue = Just q } diff --git a/Git/Queue.hs b/Git/Queue.hs index c71605ad5..b8055ab44 100644 --- a/Git/Queue.hs +++ b/Git/Queue.hs @@ -5,13 +5,15 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE BangPatterns #-} + module Git.Queue ( Queue, new, add, size, full, - flush + flush, ) where import qualified Data.Map as M @@ -34,7 +36,11 @@ data Action = Action {- A queue of actions to perform (in any order) on a git repository, - with lists of files to perform them on. This allows coalescing - similar git commands. -} -data Queue = Queue Int (M.Map Action [FilePath]) +data Queue = Queue + { size :: Int + , _limit :: Int + , _items :: M.Map Action [FilePath] + } deriving (Show, Eq) {- A recommended maximum size for the queue, after which it should be @@ -46,37 +52,33 @@ data Queue = Queue Int (M.Map Action [FilePath]) - above 20k, so this is a fairly good balance -- the queue will buffer - only a few megabytes of stuff and a minimal number of commands will be - run by xargs. -} -maxSize :: Int -maxSize = 10240 +defaultLimit :: Int +defaultLimit = 10240 {- Constructor for empty queue. -} -new :: Queue -new = Queue 0 M.empty +new :: Maybe Int -> Queue +new lim = Queue 0 (fromMaybe defaultLimit lim) M.empty {- Adds an action to a queue. -} add :: Queue -> String -> [CommandParam] -> [FilePath] -> Queue -add (Queue n m) subcommand params files = Queue (n + 1) m' +add (Queue cur lim m) subcommand params files = Queue (cur + 1) lim m' where action = Action subcommand params -- There are probably few items in the map, but there -- can be a lot of files per item. So, optimise adding -- files. m' = M.insertWith' const action fs m - fs = files ++ M.findWithDefault [] action m - -{- Number of items in a queue. -} -size :: Queue -> Int -size (Queue n _) = n + !fs = files ++ M.findWithDefault [] action m {- Is a queue large enough that it should be flushed? -} full :: Queue -> Bool -full (Queue n _) = n > maxSize +full (Queue cur lim _) = cur > lim {- Runs a queue on a git repository. -} flush :: Queue -> Repo -> IO Queue -flush (Queue _ m) repo = do +flush (Queue _ lim m) repo = do forM_ (M.toList m) $ uncurry $ runAction repo - return new + return $ Queue 0 lim M.empty {- Runs an Action on a list of files in a git repository. - diff --git a/debian/changelog b/debian/changelog index 9317a5291..28d253704 100644 --- a/debian/changelog +++ b/debian/changelog @@ -26,6 +26,8 @@ git-annex (3.20120124) UNRELEASED; urgency=low due to lazy state update thunks when adding/fixing many files. * Fixed some memory leaks that occurred when committing journal files. * whereis: Prints the urls of files that the web special remote knows about. + * Added a annex.queuesize setting, useful when adding hundreds of thousands + of files on a system with plenty of memory. -- Joey Hess <joeyh@debian.org> Tue, 24 Jan 2012 16:21:55 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 9232bf020..d4e62568f 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -576,6 +576,14 @@ Here are all the supported configuration settings. The default reserve is 1 megabyte. +* `annex.queuesize` + + git-annex builds a queue of git commands, in order to combine similar + commands for speed. By default the size of the queue is limited to + 10240 commands; this can be used to change the size. If you have plenty + of memory and are working with very large numbers of files, increasing + the queue size can speed it up. + * `annex.version` Automatically maintained, and used to automate upgrades between versions. |