summaryrefslogtreecommitdiff
path: root/Annex
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-07-09 23:36:53 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-07-09 23:36:53 -0400
commit7523a9d447483f0e68d1bc1055df735389208f25 (patch)
tree4461054038f08f4782e3ac5ddb60ae1ba464ba33 /Annex
parentea27533db0f4949474750a089b77961355eb60b9 (diff)
Fix minor FD leak in journal code.
Minor because normally only 1 FD is leaked per git-annex run. However, the test suite leaks a few hundred FDs, and this broke it on the Debian autobuilders, which seem to have a tigher than usual ulimit. The leak was introduced by the lazy getDirectoryContents' that was introduced in b54de1dad4874b7561d2c5a345954b6b5c594078 in order to scale to millions of journal files -- if the lazy list was never fully consumed, the directory handle did not get closed. Instead, pull in openDirectory/readDirectory/closeDirectory code that I already developed and submitted in a patch to the haskell directory library earlier. Using this in journalDirty avoids the place that the lazy list caused a problem. And using it in stageJournal eliminates the need for getDirectoryContents'. The getJournalFiles* functions are switched back to using the regular strict getDirectoryContents. I'm not sure if those always consume the whole list, so this avoids any leak. And the things that call those are things like git annex unused, which also look at every file committed to the git-annex branch, so would need more work to scale to insane numbers of files anyway.
Diffstat (limited to 'Annex')
-rw-r--r--Annex/Branch.hs23
-rw-r--r--Annex/Journal.hs19
2 files changed, 31 insertions, 11 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 5415876f8..a03d6ddf3 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -389,21 +389,26 @@ stageJournal jl = withIndex $ do
prepareModifyIndex jl
g <- gitRepo
let dir = gitAnnexJournalDir g
- fs <- getJournalFiles jl
(jlogf, jlogh) <- openjlog
- liftIO $ do
+ withJournalHandle $ \jh -> do
h <- hashObjectStart g
Git.UpdateIndex.streamUpdateIndex g
- [genstream dir h fs jlogh]
+ [genstream dir h jh jlogh]
hashObjectStop h
return $ cleanup dir jlogh jlogf
where
- genstream dir h fs jlogh streamer = forM_ fs $ \file -> do
- let path = dir </> file
- sha <- hashFile h path
- hPutStrLn jlogh file
- streamer $ Git.UpdateIndex.updateIndexLine
- sha FileBlob (asTopFilePath $ fileJournal file)
+ genstream dir h jh jlogh streamer = do
+ v <- readDirectory jh
+ case v of
+ Nothing -> return ()
+ Just file -> do
+ unless (dirCruft file) $ do
+ let path = dir </> file
+ sha <- hashFile h path
+ hPutStrLn jlogh file
+ streamer $ Git.UpdateIndex.updateIndexLine
+ sha FileBlob (asTopFilePath $ fileJournal file)
+ genstream dir h jh jlogh streamer
-- Clean up the staged files, as listed in the temp log file.
-- The temp file is used to avoid needing to buffer all the
-- filenames in memory.
diff --git a/Annex/Journal.hs b/Annex/Journal.hs
index 406155750..198388aa8 100644
--- a/Annex/Journal.hs
+++ b/Annex/Journal.hs
@@ -77,12 +77,27 @@ getJournalFilesStale :: Annex [FilePath]
getJournalFilesStale = do
g <- gitRepo
fs <- liftIO $ catchDefaultIO [] $
- getDirectoryContents' $ gitAnnexJournalDir g
+ getDirectoryContents $ gitAnnexJournalDir g
return $ filter (`notElem` [".", ".."]) fs
+withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
+withJournalHandle a = do
+ d <- fromRepo gitAnnexJournalDir
+ bracketIO (openDirectory d) closeDirectory (liftIO . a)
+
{- Checks if there are changes in the journal. -}
journalDirty :: Annex Bool
-journalDirty = not . null <$> getJournalFilesStale
+journalDirty = withJournalHandle go
+ where
+ go h = do
+ v <- readDirectory h
+ case v of
+ (Just f)
+ | not (dirCruft f) -> do
+ closeDirectory h
+ return True
+ | otherwise -> go h
+ Nothing -> return False
{- Produces a filename to use in the journal for a file on the branch.
-