summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-07-04 15:28:07 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-07-04 15:28:07 -0400
commitb54de1dad4874b7561d2c5a345954b6b5c594078 (patch)
tree38813b24b90673a3da5775ef998ca116f7ebcfd3
parentcabde6ee2c1574e04e127100c58b77fa776982a8 (diff)
Fix memory leak when committing millions of changes to the git-annex branch
Eg after git-annex add has run on 2 million files in one go. Slightly unhappy with the neeed to use a temp file here, but I cannot see any other alternative (see comments on the bug report). This commit was sponsored by Hamish Coleman.
-rw-r--r--Annex/Branch.hs22
-rw-r--r--debian/changelog2
-rw-r--r--doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn2
3 files changed, 23 insertions, 3 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 3443730d2..5415876f8 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -390,18 +390,34 @@ stageJournal jl = withIndex $ do
g <- gitRepo
let dir = gitAnnexJournalDir g
fs <- getJournalFiles jl
+ (jlogf, jlogh) <- openjlog
liftIO $ do
h <- hashObjectStart g
Git.UpdateIndex.streamUpdateIndex g
- [genstream dir h fs]
+ [genstream dir h fs jlogh]
hashObjectStop h
- return $ liftIO $ mapM_ (removeFile . (dir </>)) fs
+ return $ cleanup dir jlogh jlogf
where
- genstream dir h fs streamer = forM_ fs $ \file -> do
+ genstream dir h fs jlogh streamer = forM_ fs $ \file -> do
let path = dir </> file
sha <- hashFile h path
+ hPutStrLn jlogh file
streamer $ Git.UpdateIndex.updateIndexLine
sha FileBlob (asTopFilePath $ fileJournal file)
+ -- Clean up the staged files, as listed in the temp log file.
+ -- The temp file is used to avoid needing to buffer all the
+ -- filenames in memory.
+ cleanup dir jlogh jlogf = do
+ hFlush jlogh
+ hSeek jlogh AbsoluteSeek 0
+ stagedfs <- lines <$> hGetContents jlogh
+ mapM_ (removeFile . (dir </>)) stagedfs
+ hClose jlogh
+ nukeFile jlogf
+ openjlog = do
+ tmpdir <- fromRepo gitAnnexTmpMiscDir
+ createAnnexDirectory tmpdir
+ liftIO $ openTempFile tmpdir "jlog"
{- This is run after the refs have been merged into the index,
- but before the result is committed to the branch.
diff --git a/debian/changelog b/debian/changelog
index d08a08715..6ddb7b401 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -16,6 +16,8 @@ git-annex (5.20140614) UNRELEASED; urgency=medium
* Android: patch git to avoid fchmod, which fails on /sdcard.
* Support users who have set commit.gpgsign, by disabling gpg signatures
for git-annex branch commits and commits made by the assistant.
+ * Fix memory leak when committing millions of changes to the git-annex
+ branch, eg after git-annex add has run on 2 million files in one go.
-- Joey Hess <joeyh@debian.org> Mon, 16 Jun 2014 11:28:42 -0400
diff --git a/doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn b/doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn
index a248d9489..3891933a6 100644
--- a/doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn
+++ b/doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn
@@ -13,3 +13,5 @@ add 999999 ok
Stack space overflow: current size 8388608 bytes.
Use `+RTS -Ksize -RTS' to increase it.
</pre>
+
+> [[fixed|done]] --[[Joey]]