diff options
author | Joey Hess <joey@kitenet.net> | 2011-04-07 13:59:31 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2011-04-07 13:59:31 -0400 |
commit | bc51387e6dd426f46f9ab0ef23e6e3eefe7a4417 (patch) | |
tree | 9627f60c81d1852b731ea57171f4b36887847e9b | |
parent | 77f45e4e45d45a08bfe1bec210909345adb6f6d8 (diff) |
Periodically flush git command queue, to avoid boating memory usage too much.
Since the queue is flushed in between subcommand actions being run,
there should be no issues with actions that expect to queue up some stuff
and have it run after they do other stuff. So I didn't have to audit for
such assumptions.
-rw-r--r-- | Annex.hs | 29 | ||||
-rw-r--r-- | AnnexQueue.hs | 47 | ||||
-rw-r--r-- | CmdLine.hs | 13 | ||||
-rw-r--r-- | Command/Add.hs | 4 | ||||
-rw-r--r-- | Command/Fix.hs | 4 | ||||
-rw-r--r-- | Command/FromKey.hs | 4 | ||||
-rw-r--r-- | Command/Move.hs | 3 | ||||
-rw-r--r-- | Command/PreCommit.hs | 3 | ||||
-rw-r--r-- | Command/Unannex.hs | 3 | ||||
-rw-r--r-- | Content.hs | 3 | ||||
-rw-r--r-- | GitQueue.hs | 29 | ||||
-rw-r--r-- | Remote/Git.hs | 3 | ||||
-rw-r--r-- | Upgrade/V1.hs | 13 | ||||
-rw-r--r-- | debian/changelog | 2 |
14 files changed, 101 insertions, 59 deletions
@@ -13,10 +13,7 @@ module Annex ( eval, getState, changeState, - gitRepo, - queue, - queueRun, - queueRunAt, + gitRepo ) where import Control.Monad.State @@ -25,7 +22,6 @@ import qualified GitRepo as Git import qualified GitQueue import qualified BackendClass import qualified RemoteClass -import Utility -- git-annex's monad type Annex = StateT AnnexState IO @@ -93,26 +89,3 @@ changeState a = do {- Returns the git repository being acted on -} gitRepo :: Annex Git.Repo gitRepo = getState repo - -{- Adds a git command to the queue. -} -queue :: String -> [CommandParam] -> FilePath -> Annex () -queue command params file = do - state <- get - let q = repoqueue state - put state { repoqueue = GitQueue.add q command params file } - -{- Runs (and empties) the queue. -} -queueRun :: Annex () -queueRun = do - state <- get - let q = repoqueue state - g <- gitRepo - liftIO $ GitQueue.run g q - put state { repoqueue = GitQueue.empty } - -{- Runs the queue if the specified number of items have been queued. -} -queueRunAt :: Integer -> Annex () -queueRunAt n = do - state <- get - let q = repoqueue state - when (GitQueue.size q >= n) queueRun diff --git a/AnnexQueue.hs b/AnnexQueue.hs new file mode 100644 index 000000000..58e77a6e8 --- /dev/null +++ b/AnnexQueue.hs @@ -0,0 +1,47 @@ +{- git-annex command queue + - + - Copyright 2011 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module AnnexQueue ( + add, + flush, + flushWhenFull +) where + +import Control.Monad.State (liftIO) +import Control.Monad (when, unless) + +import Annex +import Messages +import qualified GitQueue +import Utility + +{- Adds a git command to the queue, possibly running previously queued + - actions if enough have accumulated. -} +add :: String -> [CommandParam] -> FilePath -> Annex () +add command params file = do + q <- getState repoqueue + store $ GitQueue.add q command params file + +{- Runs the queue if it is full. Should be called periodically. -} +flushWhenFull :: Annex () +flushWhenFull = do + q <- getState repoqueue + when (GitQueue.full q) $ flush False + +{- Runs (and empties) the queue. -} +flush :: Bool -> Annex () +flush silent = do + q <- getState repoqueue + unless (0 == GitQueue.size q) $ do + unless silent $ + showSideAction "Recording state in git..." + g <- gitRepo + q' <- liftIO $ GitQueue.flush g q + store q' + +store :: GitQueue.Queue -> Annex () +store q = changeState $ \s -> s { repoqueue = q } diff --git a/CmdLine.hs b/CmdLine.hs index de03d96ed..684ebf979 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -14,11 +14,11 @@ module CmdLine ( import System.IO.Error (try) import System.Console.GetOpt import Control.Monad.State (liftIO) -import Control.Monad (when, unless) +import Control.Monad (when) import qualified Annex +import qualified AnnexQueue import qualified GitRepo as Git -import qualified GitQueue import Types import Command import BackendList @@ -81,7 +81,9 @@ tryRun :: Annex.AnnexState -> [Annex Bool] -> IO () tryRun state actions = tryRun' state 0 actions tryRun' :: Annex.AnnexState -> Integer -> [Annex Bool] -> IO () tryRun' state errnum (a:as) = do - result <- try $ Annex.run state a + result <- try $ Annex.run state $ do + AnnexQueue.flushWhenFull + a case result of Left err -> do Annex.eval state $ showErr err @@ -100,10 +102,7 @@ startup = do {- Cleanup actions. -} shutdown :: Annex Bool shutdown = do - q <- Annex.getState Annex.repoqueue - unless (0 == GitQueue.size q) $ do - showSideAction "Recording state in git..." - Annex.queueRun + AnnexQueue.flush False liftIO $ Git.reap diff --git a/Command/Add.hs b/Command/Add.hs index da98bffa4..b532ab045 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -11,7 +11,7 @@ import Control.Monad.State (liftIO) import System.Posix.Files import Command -import qualified Annex +import qualified AnnexQueue import qualified Backend import LocationLog import Types @@ -60,5 +60,5 @@ cleanup file key = do let mtime = modificationTime s liftIO $ touch file (TimeSpec mtime) False - Annex.queue "add" [Param "--"] file + AnnexQueue.add "add" [Param "--"] file return True diff --git a/Command/Fix.hs b/Command/Fix.hs index 513e07a31..d898ce517 100644 --- a/Command/Fix.hs +++ b/Command/Fix.hs @@ -12,7 +12,7 @@ import System.Posix.Files import System.Directory import Command -import qualified Annex +import qualified AnnexQueue import Utility import Content import Messages @@ -44,5 +44,5 @@ perform file link = do cleanup :: FilePath -> CommandCleanup cleanup file = do - Annex.queue "add" [Param "--"] file + AnnexQueue.add "add" [Param "--"] file return True diff --git a/Command/FromKey.hs b/Command/FromKey.hs index 8c1a1028f..eadaa13e1 100644 --- a/Command/FromKey.hs +++ b/Command/FromKey.hs @@ -13,7 +13,7 @@ import System.Directory import Control.Monad (unless) import Command -import qualified Annex +import qualified AnnexQueue import Utility import qualified Backend import Content @@ -46,5 +46,5 @@ perform file = do cleanup :: FilePath -> CommandCleanup cleanup file = do - Annex.queue "add" [Param "--"] file + AnnexQueue.add "add" [Param "--"] file return True diff --git a/Command/Move.hs b/Command/Move.hs index 951695d66..e5e78d249 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -12,6 +12,7 @@ import Control.Monad.State (liftIO) import Command import qualified Command.Drop import qualified Annex +import qualified AnnexQueue import LocationLog import Types import Content @@ -59,7 +60,7 @@ remoteHasKey remote key present = do g <- Annex.gitRepo let remoteuuid = Remote.uuid remote logfile <- liftIO $ logChange g key remoteuuid status - Annex.queue "add" [Param "--"] logfile + AnnexQueue.add "add" [Param "--"] logfile where status = if present then ValuePresent else ValueMissing diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 727a63728..1db40f75f 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -11,6 +11,7 @@ import Control.Monad.State (liftIO) import Command import qualified Annex +import qualified AnnexQueue import qualified GitRepo as Git import qualified Command.Add import qualified Command.Fix @@ -42,5 +43,5 @@ cleanup file = do -- stage the symlink g <- Annex.gitRepo liftIO $ Git.run g "reset" [Params "-q --", File file] - Annex.queueRun + AnnexQueue.flush True return True diff --git a/Command/Unannex.hs b/Command/Unannex.hs index b0ce31cee..94db500c6 100644 --- a/Command/Unannex.hs +++ b/Command/Unannex.hs @@ -13,6 +13,7 @@ import System.Directory import Command import qualified Annex +import qualified AnnexQueue import Utility import qualified Backend import LocationLog @@ -68,6 +69,6 @@ cleanup file key = do -- Commit staged changes at end to avoid confusing the -- pre-commit hook if this file is later added back to -- git as a normal, non-annexed file. - Annex.queue "commit" [Params "-a -m", Param "content removed from git annex"] "-a" + AnnexQueue.add "commit" [Params "-a -m", Param "content removed from git annex"] "-a" return True diff --git a/Content.hs b/Content.hs index ba265c930..f63c02311 100644 --- a/Content.hs +++ b/Content.hs @@ -36,6 +36,7 @@ import LocationLog import UUID import qualified GitRepo as Git import qualified Annex +import qualified AnnexQueue import Utility import StatFS import Key @@ -72,7 +73,7 @@ logStatus key status = do unless (Git.repoIsLocalBare g) $ do u <- getUUID g logfile <- liftIO $ logChange g key u status - Annex.queue "add" [Param "--"] logfile + AnnexQueue.add "add" [Param "--"] logfile {- Runs an action, passing it a temporary filename to download, - and if the action succeeds, moves the temp file into diff --git a/GitQueue.hs b/GitQueue.hs index dfe2976da..480027fa0 100644 --- a/GitQueue.hs +++ b/GitQueue.hs @@ -10,7 +10,8 @@ module GitQueue ( empty, add, size, - run + full, + flush ) where import qualified Data.Map as M @@ -32,9 +33,21 @@ data Action = Action { {- A queue of actions to perform (in any order) on a git repository, - with lists of files to perform them on. This allows coalescing - similar git commands. -} -data Queue = Queue Integer (M.Map Action [FilePath]) +data Queue = Queue Int (M.Map Action [FilePath]) deriving (Show, Eq) +{- A recommended maximum size for the queue, after which it should be + - run. + - + - 10240 is semi-arbitrary. If we assume git filenames are between 10 and + - 255 characters long, then the queue will build up between 100kb and + - 2550kb long commands. The max command line length on linux is somewhere + - above 20k, so this is a fairly good balance -- the queue will buffer + - only a few megabytes of stuff and a minimal number of commands will be + - run by xargs. -} +maxSize :: Int +maxSize = 10240 + {- Constructor for empty queue. -} empty :: Queue empty = Queue 0 M.empty @@ -47,14 +60,18 @@ add (Queue n m) subcommand params file = Queue (n + 1) m' m' = M.insertWith' (++) action [file] m {- Number of items in a queue. -} -size :: Queue -> Integer +size :: Queue -> Int size (Queue n _) = n +{- Is a queue large enough that it should be flushed? -} +full :: Queue -> Bool +full (Queue n _) = n > maxSize + {- Runs a queue on a git repository. -} -run :: Git.Repo -> Queue -> IO () -run repo (Queue _ m) = do +flush :: Git.Repo -> Queue -> IO Queue +flush repo (Queue _ m) = do forM_ (M.toList m) $ uncurry $ runAction repo - return () + return empty {- Runs an Action on a list of files in a git repository. - diff --git a/Remote/Git.hs b/Remote/Git.hs index a45845510..2936beaf7 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -19,6 +19,7 @@ import RemoteClass import Types import qualified GitRepo as Git import qualified Annex +import qualified AnnexQueue import Locations import UUID import Utility @@ -150,7 +151,7 @@ copyToRemote r key Annex.eval a $ do ok <- Content.getViaTmp key $ \f -> liftIO $ copyFile keysrc f - Annex.queueRun + AnnexQueue.flush True return ok | Git.repoIsSsh r = do g <- Annex.gitRepo diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index 4ce2612d6..9278bce60 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -24,6 +24,7 @@ import Types import Locations import LocationLog import qualified Annex +import qualified AnnexQueue import qualified GitRepo as Git import Backend import Messages @@ -68,7 +69,7 @@ upgrade = do updateSymlinks moveLocationLogs - Annex.queueRun + AnnexQueue.flush True setVersion -- add new line to auto-merge hashed location logs @@ -106,8 +107,7 @@ updateSymlinks = do link <- calcGitLink f k liftIO $ removeFile f liftIO $ createSymbolicLink link f - Annex.queue "add" [Param "--"] f - Annex.queueRunAt 10240 + AnnexQueue.add "add" [Param "--"] f moveLocationLogs :: Annex () moveLocationLogs = do @@ -137,10 +137,9 @@ moveLocationLogs = do old <- liftIO $ readLog f new <- liftIO $ readLog dest liftIO $ writeLog dest (old++new) - Annex.queue "add" [Param "--"] dest - Annex.queue "add" [Param "--"] f - Annex.queue "rm" [Param "--quiet", Param "-f", Param "--"] f - Annex.queueRunAt 10240 + AnnexQueue.add "add" [Param "--"] dest + AnnexQueue.add "add" [Param "--"] f + AnnexQueue.add "rm" [Param "--quiet", Param "-f", Param "--"] f oldlog2key :: FilePath -> Maybe (FilePath, Key) oldlog2key l = diff --git a/debian/changelog b/debian/changelog index 6ccb9eac9..fdc740cb8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -12,6 +12,8 @@ git-annex (0.20110402) UNRELEASED; urgency=low * Add build depend on perlmagick so docs are consistently built. Closes: #621410 * Add doc-base file. Closes: #621408 + * Periodically flush git command queue, to avoid boating memory usage + too much. -- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400 |