summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex.hs4
-rw-r--r--Annex/Queue.hs22
-rw-r--r--Git/Queue.hs32
-rw-r--r--debian/changelog2
-rw-r--r--doc/git-annex.mdwn8
5 files changed, 46 insertions, 22 deletions
diff --git a/Annex.hs b/Annex.hs
index 534415207..123c9facf 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -76,12 +76,12 @@ data AnnexState = AnnexState
{ repo :: Git.Repo
, backends :: [BackendA Annex]
, remotes :: [Types.Remote.RemoteA Annex]
- , repoqueue :: Git.Queue.Queue
, output :: OutputType
, force :: Bool
, fast :: Bool
, auto :: Bool
, branchstate :: BranchState
+ , repoqueue :: Maybe Git.Queue.Queue
, catfilehandle :: Maybe CatFileHandle
, checkattrhandle :: Maybe CheckAttrHandle
, forcebackend :: Maybe String
@@ -100,12 +100,12 @@ newState gitrepo = AnnexState
{ repo = gitrepo
, backends = []
, remotes = []
- , repoqueue = Git.Queue.new
, output = NormalOutput
, force = False
, fast = False
, auto = False
, branchstate = startBranchState
+ , repoqueue = Nothing
, catfilehandle = Nothing
, checkattrhandle = Nothing
, forcebackend = Nothing
diff --git a/Annex/Queue.hs b/Annex/Queue.hs
index f611cf02e..df6ba12a2 100644
--- a/Annex/Queue.hs
+++ b/Annex/Queue.hs
@@ -12,30 +12,42 @@ module Annex.Queue (
) where
import Common.Annex
-import Annex
+import Annex hiding (new)
import qualified Git.Queue
+import qualified Git.Config
{- Adds a git command to the queue. -}
add :: String -> [CommandParam] -> [FilePath] -> Annex ()
add command params files = do
- q <- getState repoqueue
+ q <- get
store $ Git.Queue.add q command params files
{- Runs the queue if it is full. Should be called periodically. -}
flushWhenFull :: Annex ()
flushWhenFull = do
- q <- getState repoqueue
+ q <- get
when (Git.Queue.full q) $ flush False
{- Runs (and empties) the queue. -}
flush :: Bool -> Annex ()
flush silent = do
- q <- getState repoqueue
+ q <- get
unless (0 == Git.Queue.size q) $ do
unless silent $
showSideAction "Recording state in git"
q' <- inRepo $ Git.Queue.flush q
store q'
+get :: Annex Git.Queue.Queue
+get = maybe new return =<< getState repoqueue
+
+new :: Annex Git.Queue.Queue
+new = do
+ q <- Git.Queue.new <$> fromRepo queuesize
+ store q
+ return q
+ where
+ queuesize r = readish =<< Git.Config.getMaybe "annex.queuesize" r
+
store :: Git.Queue.Queue -> Annex ()
-store q = changeState $ \s -> s { repoqueue = q }
+store q = changeState $ \s -> s { repoqueue = Just q }
diff --git a/Git/Queue.hs b/Git/Queue.hs
index c71605ad5..b8055ab44 100644
--- a/Git/Queue.hs
+++ b/Git/Queue.hs
@@ -5,13 +5,15 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE BangPatterns #-}
+
module Git.Queue (
Queue,
new,
add,
size,
full,
- flush
+ flush,
) where
import qualified Data.Map as M
@@ -34,7 +36,11 @@ data Action = Action
{- A queue of actions to perform (in any order) on a git repository,
- with lists of files to perform them on. This allows coalescing
- similar git commands. -}
-data Queue = Queue Int (M.Map Action [FilePath])
+data Queue = Queue
+ { size :: Int
+ , _limit :: Int
+ , _items :: M.Map Action [FilePath]
+ }
deriving (Show, Eq)
{- A recommended maximum size for the queue, after which it should be
@@ -46,37 +52,33 @@ data Queue = Queue Int (M.Map Action [FilePath])
- above 20k, so this is a fairly good balance -- the queue will buffer
- only a few megabytes of stuff and a minimal number of commands will be
- run by xargs. -}
-maxSize :: Int
-maxSize = 10240
+defaultLimit :: Int
+defaultLimit = 10240
{- Constructor for empty queue. -}
-new :: Queue
-new = Queue 0 M.empty
+new :: Maybe Int -> Queue
+new lim = Queue 0 (fromMaybe defaultLimit lim) M.empty
{- Adds an action to a queue. -}
add :: Queue -> String -> [CommandParam] -> [FilePath] -> Queue
-add (Queue n m) subcommand params files = Queue (n + 1) m'
+add (Queue cur lim m) subcommand params files = Queue (cur + 1) lim m'
where
action = Action subcommand params
-- There are probably few items in the map, but there
-- can be a lot of files per item. So, optimise adding
-- files.
m' = M.insertWith' const action fs m
- fs = files ++ M.findWithDefault [] action m
-
-{- Number of items in a queue. -}
-size :: Queue -> Int
-size (Queue n _) = n
+ !fs = files ++ M.findWithDefault [] action m
{- Is a queue large enough that it should be flushed? -}
full :: Queue -> Bool
-full (Queue n _) = n > maxSize
+full (Queue cur lim _) = cur > lim
{- Runs a queue on a git repository. -}
flush :: Queue -> Repo -> IO Queue
-flush (Queue _ m) repo = do
+flush (Queue _ lim m) repo = do
forM_ (M.toList m) $ uncurry $ runAction repo
- return new
+ return $ Queue 0 lim M.empty
{- Runs an Action on a list of files in a git repository.
-
diff --git a/debian/changelog b/debian/changelog
index 9317a5291..28d253704 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -26,6 +26,8 @@ git-annex (3.20120124) UNRELEASED; urgency=low
due to lazy state update thunks when adding/fixing many files.
* Fixed some memory leaks that occurred when committing journal files.
* whereis: Prints the urls of files that the web special remote knows about.
+ * Added a annex.queuesize setting, useful when adding hundreds of thousands
+ of files on a system with plenty of memory.
-- Joey Hess <joeyh@debian.org> Tue, 24 Jan 2012 16:21:55 -0400
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 9232bf020..d4e62568f 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -576,6 +576,14 @@ Here are all the supported configuration settings.
The default reserve is 1 megabyte.
+* `annex.queuesize`
+
+ git-annex builds a queue of git commands, in order to combine similar
+ commands for speed. By default the size of the queue is limited to
+ 10240 commands; this can be used to change the size. If you have plenty
+ of memory and are working with very large numbers of files, increasing
+ the queue size can speed it up.
+
* `annex.version`
Automatically maintained, and used to automate upgrades between versions.