summaryrefslogtreecommitdiff
path: root/Command
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-03-11 17:15:58 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-03-11 17:15:58 -0400
commitff3644ad38d210c5ce0ebfb5a2cf5e84bb3b47da (patch)
tree8c9638d417204ad7dc2a0b3c0406dd631a0e51a1 /Command
parentb086e32c63a4932fc5916bedae7abe0690da4eb0 (diff)
status: Fixed to run in nearly constant space.
Before, it leaked space due to caching lists of keys. Now all necessary data about keys is calculated as they stream in. The "nearly constant" is due to getKeysPresent, which builds up a lot of [] thunks as it traverses .git/annex/objects/. Will deal with it later.
Diffstat (limited to 'Command')
-rw-r--r--Command/Status.hs94
-rw-r--r--Command/Unused.hs19
2 files changed, 65 insertions, 48 deletions
diff --git a/Command/Status.hs b/Command/Status.hs
index dfe847bb8..0b1741dc0 100644
--- a/Command/Status.hs
+++ b/Command/Status.hs
@@ -5,12 +5,12 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE BangPatterns #-}
+
module Command.Status where
import Control.Monad.State.Strict
import qualified Data.Map as M
-import qualified Data.Set as S
-import Data.Set (Set)
import Text.JSON
import Common.Annex
@@ -32,10 +32,18 @@ import Remote
-- a named computation that produces a statistic
type Stat = StatState (Maybe (String, StatState String))
--- cached info that multiple Stats may need
+-- data about a set of keys
+data KeyData = KeyData
+ { countKeys :: Integer
+ , sizeKeys :: Integer
+ , unknownSizeKeys :: Integer
+ , backendsKeys :: M.Map String Integer
+ }
+
+-- cached info that multiple Stats use
data StatInfo = StatInfo
- { keysPresentCache :: Maybe (Set Key)
- , keysReferencedCache :: Maybe (Set Key)
+ { presentData :: Maybe KeyData
+ , referencedData :: Maybe KeyData
}
-- a state monad for running Stats in
@@ -122,19 +130,19 @@ remote_list level desc = stat n $ nojson $ lift $ do
local_annex_size :: Stat
local_annex_size = stat "local annex size" $ json id $
- keySizeSum <$> cachedKeysPresent
+ showSizeKeys <$> cachedPresentData
local_annex_keys :: Stat
local_annex_keys = stat "local annex keys" $ json show $
- S.size <$> cachedKeysPresent
+ countKeys <$> cachedPresentData
known_annex_size :: Stat
known_annex_size = stat "known annex size" $ json id $
- keySizeSum <$> cachedKeysReferenced
+ showSizeKeys <$> cachedReferencedData
known_annex_keys :: Stat
known_annex_keys = stat "known annex keys" $ json show $
- S.size <$> cachedKeysReferenced
+ countKeys <$> cachedReferencedData
tmp_size :: Stat
tmp_size = staleSize "temporary directory size" gitAnnexTmpDir
@@ -144,46 +152,62 @@ bad_data_size = staleSize "bad keys size" gitAnnexBadDir
backend_usage :: Stat
backend_usage = stat "backend usage" $ nojson $
- calc <$> cachedKeysReferenced <*> cachedKeysPresent
+ calc
+ <$> (backendsKeys <$> cachedReferencedData)
+ <*> (backendsKeys <$> cachedPresentData)
where
- calc a b = pp "" $ reverse . sort $ map swap $ splits $ S.toList $ S.union a b
- splits :: [Key] -> [(String, Integer)]
- splits ks = M.toList $ M.fromListWith (+) $ map tcount ks
- tcount k = (keyBackendName k, 1)
- swap (a, b) = (b, a)
+ calc a b = pp "" $ reverse . sort $ map swap $ M.toList $ M.unionWith (+) a b
pp c [] = c
pp c ((n, b):xs) = "\n\t" ++ b ++ ": " ++ show n ++ pp c xs
+ swap (a, b) = (b, a)
-cachedKeysPresent :: StatState (Set Key)
-cachedKeysPresent = do
+cachedPresentData :: StatState KeyData
+cachedPresentData = do
s <- get
- case keysPresentCache s of
+ case presentData s of
Just v -> return v
Nothing -> do
- keys <- S.fromList <$> lift getKeysPresent
- put s { keysPresentCache = Just keys }
- return keys
+ v <- foldKeys <$> lift getKeysPresent
+ put s { presentData = Just v }
+ return v
-cachedKeysReferenced :: StatState (Set Key)
-cachedKeysReferenced = do
+cachedReferencedData :: StatState KeyData
+cachedReferencedData = do
s <- get
- case keysReferencedCache s of
+ case referencedData s of
Just v -> return v
Nothing -> do
- keys <- S.fromList <$> lift Command.Unused.getKeysReferenced
- put s { keysReferencedCache = Just keys }
- return keys
+ !v <- lift $ Command.Unused.withKeysReferenced
+ emptyKeyData addKey
+ put s { referencedData = Just v }
+ return v
+
+emptyKeyData :: KeyData
+emptyKeyData = KeyData 0 0 0 M.empty
-keySizeSum :: Set Key -> String
-keySizeSum s = total ++ missingnote
+foldKeys :: [Key] -> KeyData
+foldKeys = foldl' (flip addKey) emptyKeyData
+
+addKey :: Key -> KeyData -> KeyData
+addKey key (KeyData count size unknownsize backends) =
+ KeyData count' size' unknownsize' backends'
+ where
+ {- All calculations strict to avoid thunks when repeatedly
+ - applied to many keys. -}
+ !count' = count + 1
+ !backends' = M.insertWith' (+) (keyBackendName key) 1 backends
+ !size' = maybe size (+ size) ks
+ !unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
+ ks = keySize key
+
+showSizeKeys :: KeyData -> String
+showSizeKeys d = total ++ missingnote
where
- knownsizes = mapMaybe keySize $ S.toList s
- total = roughSize storageUnits False $ sum knownsizes
- missing = S.size s - genericLength knownsizes
+ total = roughSize storageUnits False $ sizeKeys d
missingnote
- | missing == 0 = ""
+ | unknownSizeKeys d == 0 = ""
| otherwise = aside $
- "+ " ++ show missing ++
+ "+ " ++ show (unknownSizeKeys d) ++
" keys of unknown size"
staleSize :: String -> (Git.Repo -> FilePath) -> Stat
@@ -192,7 +216,7 @@ staleSize label dirspec = do
if null keys
then nostat
else stat label $ json (++ aside "clean up with git-annex unused") $
- return $ keySizeSum $ S.fromList keys
+ return $ showSizeKeys $ foldKeys keys
aside :: String -> String
aside s = " (" ++ s ++ ")"
diff --git a/Command/Unused.hs b/Command/Unused.hs
index ba14bfc4a..69b58c5e7 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010-2011 Joey Hess <joey@kitenet.net>
+ - Copyright 2010-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -171,7 +171,7 @@ excludeReferenced l = do
go s (r:rs)
| s == S.empty = return [] -- optimisation
| otherwise = do
- !s' <- withKeysReferencedInGit r s S.delete
+ s' <- withKeysReferencedInGit r s S.delete
go s' rs
{- Finds items in the first, smaller list, that are not
@@ -186,21 +186,14 @@ exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
where
remove a b = foldl (flip S.delete) b a
-{- List of keys referenced by symlinks in the git repo. -}
-getKeysReferenced :: Annex [Key]
-getKeysReferenced = do
- top <- fromRepo Git.workTree
- files <- inRepo $ LsFiles.inRepo [top]
- keypairs <- mapM Backend.lookupFile files
- return $ map fst $ catMaybes keypairs
-
{- Given an initial value, mutates it using an action for each
- key referenced by symlinks in the git repo. -}
withKeysReferenced :: v -> (Key -> v -> v) -> Annex v
-withKeysReferenced initial a = do
- top <- fromRepo Git.workTree
- go initial =<< inRepo (LsFiles.inRepo [top])
+withKeysReferenced initial a = go initial =<< files
where
+ files = do
+ top <- fromRepo Git.workTree
+ inRepo $ LsFiles.inRepo [top]
go v [] = return v
go v (f:fs) = do
x <- Backend.lookupFile f