summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-04-07 13:59:31 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-04-07 13:59:31 -0400
commitbc51387e6dd426f46f9ab0ef23e6e3eefe7a4417 (patch)
tree9627f60c81d1852b731ea57171f4b36887847e9b
parent77f45e4e45d45a08bfe1bec210909345adb6f6d8 (diff)
Periodically flush git command queue, to avoid boating memory usage too much.
Since the queue is flushed in between subcommand actions being run, there should be no issues with actions that expect to queue up some stuff and have it run after they do other stuff. So I didn't have to audit for such assumptions.
-rw-r--r--Annex.hs29
-rw-r--r--AnnexQueue.hs47
-rw-r--r--CmdLine.hs13
-rw-r--r--Command/Add.hs4
-rw-r--r--Command/Fix.hs4
-rw-r--r--Command/FromKey.hs4
-rw-r--r--Command/Move.hs3
-rw-r--r--Command/PreCommit.hs3
-rw-r--r--Command/Unannex.hs3
-rw-r--r--Content.hs3
-rw-r--r--GitQueue.hs29
-rw-r--r--Remote/Git.hs3
-rw-r--r--Upgrade/V1.hs13
-rw-r--r--debian/changelog2
14 files changed, 101 insertions, 59 deletions
diff --git a/Annex.hs b/Annex.hs
index 2723c6a00..f4e5d599d 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -13,10 +13,7 @@ module Annex (
eval,
getState,
changeState,
- gitRepo,
- queue,
- queueRun,
- queueRunAt,
+ gitRepo
) where
import Control.Monad.State
@@ -25,7 +22,6 @@ import qualified GitRepo as Git
import qualified GitQueue
import qualified BackendClass
import qualified RemoteClass
-import Utility
-- git-annex's monad
type Annex = StateT AnnexState IO
@@ -93,26 +89,3 @@ changeState a = do
{- Returns the git repository being acted on -}
gitRepo :: Annex Git.Repo
gitRepo = getState repo
-
-{- Adds a git command to the queue. -}
-queue :: String -> [CommandParam] -> FilePath -> Annex ()
-queue command params file = do
- state <- get
- let q = repoqueue state
- put state { repoqueue = GitQueue.add q command params file }
-
-{- Runs (and empties) the queue. -}
-queueRun :: Annex ()
-queueRun = do
- state <- get
- let q = repoqueue state
- g <- gitRepo
- liftIO $ GitQueue.run g q
- put state { repoqueue = GitQueue.empty }
-
-{- Runs the queue if the specified number of items have been queued. -}
-queueRunAt :: Integer -> Annex ()
-queueRunAt n = do
- state <- get
- let q = repoqueue state
- when (GitQueue.size q >= n) queueRun
diff --git a/AnnexQueue.hs b/AnnexQueue.hs
new file mode 100644
index 000000000..58e77a6e8
--- /dev/null
+++ b/AnnexQueue.hs
@@ -0,0 +1,47 @@
+{- git-annex command queue
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module AnnexQueue (
+ add,
+ flush,
+ flushWhenFull
+) where
+
+import Control.Monad.State (liftIO)
+import Control.Monad (when, unless)
+
+import Annex
+import Messages
+import qualified GitQueue
+import Utility
+
+{- Adds a git command to the queue, possibly running previously queued
+ - actions if enough have accumulated. -}
+add :: String -> [CommandParam] -> FilePath -> Annex ()
+add command params file = do
+ q <- getState repoqueue
+ store $ GitQueue.add q command params file
+
+{- Runs the queue if it is full. Should be called periodically. -}
+flushWhenFull :: Annex ()
+flushWhenFull = do
+ q <- getState repoqueue
+ when (GitQueue.full q) $ flush False
+
+{- Runs (and empties) the queue. -}
+flush :: Bool -> Annex ()
+flush silent = do
+ q <- getState repoqueue
+ unless (0 == GitQueue.size q) $ do
+ unless silent $
+ showSideAction "Recording state in git..."
+ g <- gitRepo
+ q' <- liftIO $ GitQueue.flush g q
+ store q'
+
+store :: GitQueue.Queue -> Annex ()
+store q = changeState $ \s -> s { repoqueue = q }
diff --git a/CmdLine.hs b/CmdLine.hs
index de03d96ed..684ebf979 100644
--- a/CmdLine.hs
+++ b/CmdLine.hs
@@ -14,11 +14,11 @@ module CmdLine (
import System.IO.Error (try)
import System.Console.GetOpt
import Control.Monad.State (liftIO)
-import Control.Monad (when, unless)
+import Control.Monad (when)
import qualified Annex
+import qualified AnnexQueue
import qualified GitRepo as Git
-import qualified GitQueue
import Types
import Command
import BackendList
@@ -81,7 +81,9 @@ tryRun :: Annex.AnnexState -> [Annex Bool] -> IO ()
tryRun state actions = tryRun' state 0 actions
tryRun' :: Annex.AnnexState -> Integer -> [Annex Bool] -> IO ()
tryRun' state errnum (a:as) = do
- result <- try $ Annex.run state a
+ result <- try $ Annex.run state $ do
+ AnnexQueue.flushWhenFull
+ a
case result of
Left err -> do
Annex.eval state $ showErr err
@@ -100,10 +102,7 @@ startup = do
{- Cleanup actions. -}
shutdown :: Annex Bool
shutdown = do
- q <- Annex.getState Annex.repoqueue
- unless (0 == GitQueue.size q) $ do
- showSideAction "Recording state in git..."
- Annex.queueRun
+ AnnexQueue.flush False
liftIO $ Git.reap
diff --git a/Command/Add.hs b/Command/Add.hs
index da98bffa4..b532ab045 100644
--- a/Command/Add.hs
+++ b/Command/Add.hs
@@ -11,7 +11,7 @@ import Control.Monad.State (liftIO)
import System.Posix.Files
import Command
-import qualified Annex
+import qualified AnnexQueue
import qualified Backend
import LocationLog
import Types
@@ -60,5 +60,5 @@ cleanup file key = do
let mtime = modificationTime s
liftIO $ touch file (TimeSpec mtime) False
- Annex.queue "add" [Param "--"] file
+ AnnexQueue.add "add" [Param "--"] file
return True
diff --git a/Command/Fix.hs b/Command/Fix.hs
index 513e07a31..d898ce517 100644
--- a/Command/Fix.hs
+++ b/Command/Fix.hs
@@ -12,7 +12,7 @@ import System.Posix.Files
import System.Directory
import Command
-import qualified Annex
+import qualified AnnexQueue
import Utility
import Content
import Messages
@@ -44,5 +44,5 @@ perform file link = do
cleanup :: FilePath -> CommandCleanup
cleanup file = do
- Annex.queue "add" [Param "--"] file
+ AnnexQueue.add "add" [Param "--"] file
return True
diff --git a/Command/FromKey.hs b/Command/FromKey.hs
index 8c1a1028f..eadaa13e1 100644
--- a/Command/FromKey.hs
+++ b/Command/FromKey.hs
@@ -13,7 +13,7 @@ import System.Directory
import Control.Monad (unless)
import Command
-import qualified Annex
+import qualified AnnexQueue
import Utility
import qualified Backend
import Content
@@ -46,5 +46,5 @@ perform file = do
cleanup :: FilePath -> CommandCleanup
cleanup file = do
- Annex.queue "add" [Param "--"] file
+ AnnexQueue.add "add" [Param "--"] file
return True
diff --git a/Command/Move.hs b/Command/Move.hs
index 951695d66..e5e78d249 100644
--- a/Command/Move.hs
+++ b/Command/Move.hs
@@ -12,6 +12,7 @@ import Control.Monad.State (liftIO)
import Command
import qualified Command.Drop
import qualified Annex
+import qualified AnnexQueue
import LocationLog
import Types
import Content
@@ -59,7 +60,7 @@ remoteHasKey remote key present = do
g <- Annex.gitRepo
let remoteuuid = Remote.uuid remote
logfile <- liftIO $ logChange g key remoteuuid status
- Annex.queue "add" [Param "--"] logfile
+ AnnexQueue.add "add" [Param "--"] logfile
where
status = if present then ValuePresent else ValueMissing
diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs
index 727a63728..1db40f75f 100644
--- a/Command/PreCommit.hs
+++ b/Command/PreCommit.hs
@@ -11,6 +11,7 @@ import Control.Monad.State (liftIO)
import Command
import qualified Annex
+import qualified AnnexQueue
import qualified GitRepo as Git
import qualified Command.Add
import qualified Command.Fix
@@ -42,5 +43,5 @@ cleanup file = do
-- stage the symlink
g <- Annex.gitRepo
liftIO $ Git.run g "reset" [Params "-q --", File file]
- Annex.queueRun
+ AnnexQueue.flush True
return True
diff --git a/Command/Unannex.hs b/Command/Unannex.hs
index b0ce31cee..94db500c6 100644
--- a/Command/Unannex.hs
+++ b/Command/Unannex.hs
@@ -13,6 +13,7 @@ import System.Directory
import Command
import qualified Annex
+import qualified AnnexQueue
import Utility
import qualified Backend
import LocationLog
@@ -68,6 +69,6 @@ cleanup file key = do
-- Commit staged changes at end to avoid confusing the
-- pre-commit hook if this file is later added back to
-- git as a normal, non-annexed file.
- Annex.queue "commit" [Params "-a -m", Param "content removed from git annex"] "-a"
+ AnnexQueue.add "commit" [Params "-a -m", Param "content removed from git annex"] "-a"
return True
diff --git a/Content.hs b/Content.hs
index ba265c930..f63c02311 100644
--- a/Content.hs
+++ b/Content.hs
@@ -36,6 +36,7 @@ import LocationLog
import UUID
import qualified GitRepo as Git
import qualified Annex
+import qualified AnnexQueue
import Utility
import StatFS
import Key
@@ -72,7 +73,7 @@ logStatus key status = do
unless (Git.repoIsLocalBare g) $ do
u <- getUUID g
logfile <- liftIO $ logChange g key u status
- Annex.queue "add" [Param "--"] logfile
+ AnnexQueue.add "add" [Param "--"] logfile
{- Runs an action, passing it a temporary filename to download,
- and if the action succeeds, moves the temp file into
diff --git a/GitQueue.hs b/GitQueue.hs
index dfe2976da..480027fa0 100644
--- a/GitQueue.hs
+++ b/GitQueue.hs
@@ -10,7 +10,8 @@ module GitQueue (
empty,
add,
size,
- run
+ full,
+ flush
) where
import qualified Data.Map as M
@@ -32,9 +33,21 @@ data Action = Action {
{- A queue of actions to perform (in any order) on a git repository,
- with lists of files to perform them on. This allows coalescing
- similar git commands. -}
-data Queue = Queue Integer (M.Map Action [FilePath])
+data Queue = Queue Int (M.Map Action [FilePath])
deriving (Show, Eq)
+{- A recommended maximum size for the queue, after which it should be
+ - run.
+ -
+ - 10240 is semi-arbitrary. If we assume git filenames are between 10 and
+ - 255 characters long, then the queue will build up between 100kb and
+ - 2550kb long commands. The max command line length on linux is somewhere
+ - above 20k, so this is a fairly good balance -- the queue will buffer
+ - only a few megabytes of stuff and a minimal number of commands will be
+ - run by xargs. -}
+maxSize :: Int
+maxSize = 10240
+
{- Constructor for empty queue. -}
empty :: Queue
empty = Queue 0 M.empty
@@ -47,14 +60,18 @@ add (Queue n m) subcommand params file = Queue (n + 1) m'
m' = M.insertWith' (++) action [file] m
{- Number of items in a queue. -}
-size :: Queue -> Integer
+size :: Queue -> Int
size (Queue n _) = n
+{- Is a queue large enough that it should be flushed? -}
+full :: Queue -> Bool
+full (Queue n _) = n > maxSize
+
{- Runs a queue on a git repository. -}
-run :: Git.Repo -> Queue -> IO ()
-run repo (Queue _ m) = do
+flush :: Git.Repo -> Queue -> IO Queue
+flush repo (Queue _ m) = do
forM_ (M.toList m) $ uncurry $ runAction repo
- return ()
+ return empty
{- Runs an Action on a list of files in a git repository.
-
diff --git a/Remote/Git.hs b/Remote/Git.hs
index a45845510..2936beaf7 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -19,6 +19,7 @@ import RemoteClass
import Types
import qualified GitRepo as Git
import qualified Annex
+import qualified AnnexQueue
import Locations
import UUID
import Utility
@@ -150,7 +151,7 @@ copyToRemote r key
Annex.eval a $ do
ok <- Content.getViaTmp key $
\f -> liftIO $ copyFile keysrc f
- Annex.queueRun
+ AnnexQueue.flush True
return ok
| Git.repoIsSsh r = do
g <- Annex.gitRepo
diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs
index 4ce2612d6..9278bce60 100644
--- a/Upgrade/V1.hs
+++ b/Upgrade/V1.hs
@@ -24,6 +24,7 @@ import Types
import Locations
import LocationLog
import qualified Annex
+import qualified AnnexQueue
import qualified GitRepo as Git
import Backend
import Messages
@@ -68,7 +69,7 @@ upgrade = do
updateSymlinks
moveLocationLogs
- Annex.queueRun
+ AnnexQueue.flush True
setVersion
-- add new line to auto-merge hashed location logs
@@ -106,8 +107,7 @@ updateSymlinks = do
link <- calcGitLink f k
liftIO $ removeFile f
liftIO $ createSymbolicLink link f
- Annex.queue "add" [Param "--"] f
- Annex.queueRunAt 10240
+ AnnexQueue.add "add" [Param "--"] f
moveLocationLogs :: Annex ()
moveLocationLogs = do
@@ -137,10 +137,9 @@ moveLocationLogs = do
old <- liftIO $ readLog f
new <- liftIO $ readLog dest
liftIO $ writeLog dest (old++new)
- Annex.queue "add" [Param "--"] dest
- Annex.queue "add" [Param "--"] f
- Annex.queue "rm" [Param "--quiet", Param "-f", Param "--"] f
- Annex.queueRunAt 10240
+ AnnexQueue.add "add" [Param "--"] dest
+ AnnexQueue.add "add" [Param "--"] f
+ AnnexQueue.add "rm" [Param "--quiet", Param "-f", Param "--"] f
oldlog2key :: FilePath -> Maybe (FilePath, Key)
oldlog2key l =
diff --git a/debian/changelog b/debian/changelog
index 6ccb9eac9..fdc740cb8 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -12,6 +12,8 @@ git-annex (0.20110402) UNRELEASED; urgency=low
* Add build depend on perlmagick so docs are consistently built.
Closes: #621410
* Add doc-base file. Closes: #621408
+ * Periodically flush git command queue, to avoid boating memory usage
+ too much.
-- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400