summaryrefslogtreecommitdiff
path: root/Command
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-03-11 15:19:07 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-03-11 16:24:07 -0400
commitb086e32c63a4932fc5916bedae7abe0690da4eb0 (patch)
treee3c59401831ef04602151b18e3cfcc10c8887881 /Command
parenta13949bf3794a33248611aca9ef821a6088b1b3a (diff)
unused: Reduce memory usage significantly.
Much of the memory bloat turned out to be due to getKeysReferenced containing a mapM, which is strict and buffered the whole list rather than streaming it. The other half of the bloat was due to building a temporary Set in order to call S.difference. While that is more cpu efficient, I switched to successive S.delete, since with it, I can run a whole git annex unused in less than 8 mb of memory. The whole Set of keys with content available is still stored in memory, so running unused in a repo with a whole lot of file content will still use more memory. In a repo containing 6000 files, it needed 40 mb. Note that the status command still uses the bloatful getKeysReferenced.
Diffstat (limited to 'Command')
-rw-r--r--Command/Unused.hs50
1 files changed, 33 insertions, 17 deletions
diff --git a/Command/Unused.hs b/Command/Unused.hs
index bbef835f7..ba14bfc4a 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -155,9 +155,9 @@ unusedKeys = do
excludeReferenced :: [Key] -> Annex [Key]
excludeReferenced [] = return [] -- optimisation
excludeReferenced l = do
- c <- inRepo $ Git.Command.pipeRead [Param "show-ref"]
- removewith (getKeysReferenced : map getKeysReferencedInGit (refs c))
- (S.fromList l)
+ let s = S.fromList l
+ !s' <- withKeysReferenced s S.delete
+ go s' =<< refs <$> (inRepo $ Git.Command.pipeRead [Param "show-ref"])
where
-- Skip the git-annex branches, and get all other unique refs.
refs = map (Git.Ref . snd) .
@@ -167,13 +167,12 @@ excludeReferenced l = do
uniqref (a, _) (b, _) = a == b
ourbranchend = '/' : show Annex.Branch.name
ourbranches (_, b) = not $ ourbranchend `isSuffixOf` b
- removewith [] s = return $ S.toList s
- removewith (a:as) s
+ go s [] = return $ S.toList s
+ go s (r:rs)
| s == S.empty = return [] -- optimisation
| otherwise = do
- referenced <- a
- let !s' = s `S.difference` S.fromList referenced
- removewith as s'
+ !s' <- withKeysReferencedInGit r s S.delete
+ go s' rs
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
@@ -195,20 +194,37 @@ getKeysReferenced = do
keypairs <- mapM Backend.lookupFile files
return $ map fst $ catMaybes keypairs
-{- List of keys referenced by symlinks in a git ref. -}
-getKeysReferencedInGit :: Git.Ref -> Annex [Key]
-getKeysReferencedInGit ref = do
+{- Given an initial value, mutates it using an action for each
+ - key referenced by symlinks in the git repo. -}
+withKeysReferenced :: v -> (Key -> v -> v) -> Annex v
+withKeysReferenced initial a = do
+ top <- fromRepo Git.workTree
+ go initial =<< inRepo (LsFiles.inRepo [top])
+ where
+ go v [] = return v
+ go v (f:fs) = do
+ x <- Backend.lookupFile f
+ case x of
+ Nothing -> go v fs
+ Just (k, _) -> do
+ let !v' = a k v
+ go v' fs
+
+withKeysReferencedInGit :: Git.Ref -> v -> (Key -> v -> v) -> Annex v
+withKeysReferencedInGit ref initial a = do
showAction $ "checking " ++ Git.Ref.describe ref
- findkeys [] =<< inRepo (LsTree.lsTree ref)
+ go initial =<< inRepo (LsTree.lsTree ref)
where
- findkeys c [] = return c
- findkeys c (l:ls)
+ go v [] = return v
+ go v (l:ls)
| isSymLink (LsTree.mode l) = do
content <- L.decodeUtf8 <$> catFile ref (LsTree.file l)
case fileKey (takeFileName $ L.unpack content) of
- Nothing -> findkeys c ls
- Just k -> findkeys (k:c) ls
- | otherwise = findkeys c ls
+ Nothing -> go v ls
+ Just k -> do
+ let !v' = a k v
+ go v' ls
+ | otherwise = go v ls
{- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable.