diff options
author | Joey Hess <joey@kitenet.net> | 2014-07-04 15:28:07 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2014-07-04 15:28:07 -0400 |
commit | b54de1dad4874b7561d2c5a345954b6b5c594078 (patch) | |
tree | 38813b24b90673a3da5775ef998ca116f7ebcfd3 | |
parent | cabde6ee2c1574e04e127100c58b77fa776982a8 (diff) |
Fix memory leak when committing millions of changes to the git-annex branch
Eg after git-annex add has run on 2 million files in one go.
Slightly unhappy with the neeed to use a temp file here, but I cannot see
any other alternative (see comments on the bug report).
This commit was sponsored by Hamish Coleman.
-rw-r--r-- | Annex/Branch.hs | 22 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn | 2 |
3 files changed, 23 insertions, 3 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 3443730d2..5415876f8 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -390,18 +390,34 @@ stageJournal jl = withIndex $ do g <- gitRepo let dir = gitAnnexJournalDir g fs <- getJournalFiles jl + (jlogf, jlogh) <- openjlog liftIO $ do h <- hashObjectStart g Git.UpdateIndex.streamUpdateIndex g - [genstream dir h fs] + [genstream dir h fs jlogh] hashObjectStop h - return $ liftIO $ mapM_ (removeFile . (dir </>)) fs + return $ cleanup dir jlogh jlogf where - genstream dir h fs streamer = forM_ fs $ \file -> do + genstream dir h fs jlogh streamer = forM_ fs $ \file -> do let path = dir </> file sha <- hashFile h path + hPutStrLn jlogh file streamer $ Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath $ fileJournal file) + -- Clean up the staged files, as listed in the temp log file. + -- The temp file is used to avoid needing to buffer all the + -- filenames in memory. + cleanup dir jlogh jlogf = do + hFlush jlogh + hSeek jlogh AbsoluteSeek 0 + stagedfs <- lines <$> hGetContents jlogh + mapM_ (removeFile . (dir </>)) stagedfs + hClose jlogh + nukeFile jlogf + openjlog = do + tmpdir <- fromRepo gitAnnexTmpMiscDir + createAnnexDirectory tmpdir + liftIO $ openTempFile tmpdir "jlog" {- This is run after the refs have been merged into the index, - but before the result is committed to the branch. diff --git a/debian/changelog b/debian/changelog index d08a08715..6ddb7b401 100644 --- a/debian/changelog +++ b/debian/changelog @@ -16,6 +16,8 @@ git-annex (5.20140614) UNRELEASED; urgency=medium * Android: patch git to avoid fchmod, which fails on /sdcard. * Support users who have set commit.gpgsign, by disabling gpg signatures for git-annex branch commits and commits made by the assistant. + * Fix memory leak when committing millions of changes to the git-annex + branch, eg after git-annex add has run on 2 million files in one go. -- Joey Hess <joeyh@debian.org> Mon, 16 Jun 2014 11:28:42 -0400 diff --git a/doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn b/doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn index a248d9489..3891933a6 100644 --- a/doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn +++ b/doc/bugs/runs_of_of_memory_adding_2_million_files.mdwn @@ -13,3 +13,5 @@ add 999999 ok Stack space overflow: current size 8388608 bytes. Use `+RTS -Ksize -RTS' to increase it. </pre> + +> [[fixed|done]] --[[Joey]] |