summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-03-11 18:04:58 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-03-11 18:04:58 -0400
commitb3256946457ec8a2da056573bf49593b225adbd8 (patch)
treefb13abe0a14baefd0dbed5ccbc87052f4909d27a
parentff3644ad38d210c5ce0ebfb5a2cf5e84bb3b47da (diff)
getKeysPresent is now fully lazy
.. Allowing it to be used by things in constant space! Random statistics: git annex status has gone from taking 239 mb of memory and 26 seconds in a repo, to 8 mb and 13 seconds. The trick here is the unsafeInterleaveIO, and the form of the function's recursion, which I cribbed heavily from System.IO.HVFS.Utils.recurseDirStat. The difference is, this one goes to a limited depth and avoids statting everything.
-rw-r--r--Annex/Content.hs28
-rw-r--r--debian/changelog2
2 files changed, 16 insertions, 14 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs
index bf5a6c3a7..ccaff5c56 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -27,6 +27,7 @@ module Annex.Content (
import Control.Exception (bracket_)
import System.Posix.Types
+import System.IO.Unsafe (unsafeInterleaveIO)
import Common.Annex
import Logs.Location
@@ -290,19 +291,20 @@ moveBad key = do
{- List of keys whose content exists in .git/annex/objects/ -}
getKeysPresent :: Annex [Key]
-getKeysPresent = getKeysPresent' =<< fromRepo gitAnnexObjectDir
-getKeysPresent' :: FilePath -> Annex [Key]
-getKeysPresent' dir = do
- exists <- liftIO $ doesDirectoryExist dir
- if not exists
- then return []
- else liftIO $ do
- -- 2 levels of hashing
- levela <- dirContents dir
- levelb <- mapM dirContents levela
- contents <- unsafeInterleaveIO $ mapM dirContents (concat levelb)
- let files = concat contents
- return $ mapMaybe (fileKey . takeFileName) files
+getKeysPresent = liftIO . traverse (2 :: Int) =<< fromRepo gitAnnexObjectDir
+ where
+ traverse depth dir = do
+ contents <- catchDefaultIO (dirContents dir) []
+ if depth == 0
+ then continue (mapMaybe (fileKey . takeFileName) contents) []
+ else do
+ let deeper = traverse (depth - 1)
+ continue [] (map deeper contents)
+ continue keys [] = return keys
+ continue keys (a:as) = do
+ {- Force lazy traversal with unsafeInterleaveIO. -}
+ morekeys <- unsafeInterleaveIO a
+ continue (morekeys++keys) as
{- Things to do to record changes to content when shutting down.
-
diff --git a/debian/changelog b/debian/changelog
index 6da54056c..120513806 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -6,7 +6,7 @@ git-annex (3.20120310) UNRELEASED; urgency=low
* unused: Reduce memory usage significantly. Still not constant
space, but now only needs to store the set of file contents that
are present in the annex in memory.
- * status: Fixed to run in nearly constant space.
+ * status: Fixed to run in constant space.
-- Joey Hess <joeyh@debian.org> Sat, 10 Mar 2012 14:03:22 -0400