From b3256946457ec8a2da056573bf49593b225adbd8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 11 Mar 2012 18:04:58 -0400 Subject: getKeysPresent is now fully lazy .. Allowing it to be used by things in constant space! Random statistics: git annex status has gone from taking 239 mb of memory and 26 seconds in a repo, to 8 mb and 13 seconds. The trick here is the unsafeInterleaveIO, and the form of the function's recursion, which I cribbed heavily from System.IO.HVFS.Utils.recurseDirStat. The difference is, this one goes to a limited depth and avoids statting everything. --- Annex/Content.hs | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'Annex/Content.hs') diff --git a/Annex/Content.hs b/Annex/Content.hs index bf5a6c3a7..ccaff5c56 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -27,6 +27,7 @@ module Annex.Content ( import Control.Exception (bracket_) import System.Posix.Types +import System.IO.Unsafe (unsafeInterleaveIO) import Common.Annex import Logs.Location @@ -290,19 +291,20 @@ moveBad key = do {- List of keys whose content exists in .git/annex/objects/ -} getKeysPresent :: Annex [Key] -getKeysPresent = getKeysPresent' =<< fromRepo gitAnnexObjectDir -getKeysPresent' :: FilePath -> Annex [Key] -getKeysPresent' dir = do - exists <- liftIO $ doesDirectoryExist dir - if not exists - then return [] - else liftIO $ do - -- 2 levels of hashing - levela <- dirContents dir - levelb <- mapM dirContents levela - contents <- unsafeInterleaveIO $ mapM dirContents (concat levelb) - let files = concat contents - return $ mapMaybe (fileKey . takeFileName) files +getKeysPresent = liftIO . traverse (2 :: Int) =<< fromRepo gitAnnexObjectDir + where + traverse depth dir = do + contents <- catchDefaultIO (dirContents dir) [] + if depth == 0 + then continue (mapMaybe (fileKey . takeFileName) contents) [] + else do + let deeper = traverse (depth - 1) + continue [] (map deeper contents) + continue keys [] = return keys + continue keys (a:as) = do + {- Force lazy traversal with unsafeInterleaveIO. -} + morekeys <- unsafeInterleaveIO a + continue (morekeys++keys) as {- Things to do to record changes to content when shutting down. - -- cgit v1.2.3