summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/Content.hs2
-rw-r--r--Command/Status.hs94
-rw-r--r--Command/Unused.hs19
-rw-r--r--debian/changelog1
4 files changed, 67 insertions, 49 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs
index fdd03f320..bf5a6c3a7 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -300,7 +300,7 @@ getKeysPresent' dir = do
-- 2 levels of hashing
levela <- dirContents dir
levelb <- mapM dirContents levela
- contents <- mapM dirContents (concat levelb)
+ contents <- unsafeInterleaveIO $ mapM dirContents (concat levelb)
let files = concat contents
return $ mapMaybe (fileKey . takeFileName) files
diff --git a/Command/Status.hs b/Command/Status.hs
index dfe847bb8..0b1741dc0 100644
--- a/Command/Status.hs
+++ b/Command/Status.hs
@@ -5,12 +5,12 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE BangPatterns #-}
+
module Command.Status where
import Control.Monad.State.Strict
import qualified Data.Map as M
-import qualified Data.Set as S
-import Data.Set (Set)
import Text.JSON
import Common.Annex
@@ -32,10 +32,18 @@ import Remote
-- a named computation that produces a statistic
type Stat = StatState (Maybe (String, StatState String))
--- cached info that multiple Stats may need
+-- data about a set of keys
+data KeyData = KeyData
+ { countKeys :: Integer
+ , sizeKeys :: Integer
+ , unknownSizeKeys :: Integer
+ , backendsKeys :: M.Map String Integer
+ }
+
+-- cached info that multiple Stats use
data StatInfo = StatInfo
- { keysPresentCache :: Maybe (Set Key)
- , keysReferencedCache :: Maybe (Set Key)
+ { presentData :: Maybe KeyData
+ , referencedData :: Maybe KeyData
}
-- a state monad for running Stats in
@@ -122,19 +130,19 @@ remote_list level desc = stat n $ nojson $ lift $ do
local_annex_size :: Stat
local_annex_size = stat "local annex size" $ json id $
- keySizeSum <$> cachedKeysPresent
+ showSizeKeys <$> cachedPresentData
local_annex_keys :: Stat
local_annex_keys = stat "local annex keys" $ json show $
- S.size <$> cachedKeysPresent
+ countKeys <$> cachedPresentData
known_annex_size :: Stat
known_annex_size = stat "known annex size" $ json id $
- keySizeSum <$> cachedKeysReferenced
+ showSizeKeys <$> cachedReferencedData
known_annex_keys :: Stat
known_annex_keys = stat "known annex keys" $ json show $
- S.size <$> cachedKeysReferenced
+ countKeys <$> cachedReferencedData
tmp_size :: Stat
tmp_size = staleSize "temporary directory size" gitAnnexTmpDir
@@ -144,46 +152,62 @@ bad_data_size = staleSize "bad keys size" gitAnnexBadDir
backend_usage :: Stat
backend_usage = stat "backend usage" $ nojson $
- calc <$> cachedKeysReferenced <*> cachedKeysPresent
+ calc
+ <$> (backendsKeys <$> cachedReferencedData)
+ <*> (backendsKeys <$> cachedPresentData)
where
- calc a b = pp "" $ reverse . sort $ map swap $ splits $ S.toList $ S.union a b
- splits :: [Key] -> [(String, Integer)]
- splits ks = M.toList $ M.fromListWith (+) $ map tcount ks
- tcount k = (keyBackendName k, 1)
- swap (a, b) = (b, a)
+ calc a b = pp "" $ reverse . sort $ map swap $ M.toList $ M.unionWith (+) a b
pp c [] = c
pp c ((n, b):xs) = "\n\t" ++ b ++ ": " ++ show n ++ pp c xs
+ swap (a, b) = (b, a)
-cachedKeysPresent :: StatState (Set Key)
-cachedKeysPresent = do
+cachedPresentData :: StatState KeyData
+cachedPresentData = do
s <- get
- case keysPresentCache s of
+ case presentData s of
Just v -> return v
Nothing -> do
- keys <- S.fromList <$> lift getKeysPresent
- put s { keysPresentCache = Just keys }
- return keys
+ v <- foldKeys <$> lift getKeysPresent
+ put s { presentData = Just v }
+ return v
-cachedKeysReferenced :: StatState (Set Key)
-cachedKeysReferenced = do
+cachedReferencedData :: StatState KeyData
+cachedReferencedData = do
s <- get
- case keysReferencedCache s of
+ case referencedData s of
Just v -> return v
Nothing -> do
- keys <- S.fromList <$> lift Command.Unused.getKeysReferenced
- put s { keysReferencedCache = Just keys }
- return keys
+ !v <- lift $ Command.Unused.withKeysReferenced
+ emptyKeyData addKey
+ put s { referencedData = Just v }
+ return v
+
+emptyKeyData :: KeyData
+emptyKeyData = KeyData 0 0 0 M.empty
-keySizeSum :: Set Key -> String
-keySizeSum s = total ++ missingnote
+foldKeys :: [Key] -> KeyData
+foldKeys = foldl' (flip addKey) emptyKeyData
+
+addKey :: Key -> KeyData -> KeyData
+addKey key (KeyData count size unknownsize backends) =
+ KeyData count' size' unknownsize' backends'
+ where
+ {- All calculations strict to avoid thunks when repeatedly
+ - applied to many keys. -}
+ !count' = count + 1
+ !backends' = M.insertWith' (+) (keyBackendName key) 1 backends
+ !size' = maybe size (+ size) ks
+ !unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
+ ks = keySize key
+
+showSizeKeys :: KeyData -> String
+showSizeKeys d = total ++ missingnote
where
- knownsizes = mapMaybe keySize $ S.toList s
- total = roughSize storageUnits False $ sum knownsizes
- missing = S.size s - genericLength knownsizes
+ total = roughSize storageUnits False $ sizeKeys d
missingnote
- | missing == 0 = ""
+ | unknownSizeKeys d == 0 = ""
| otherwise = aside $
- "+ " ++ show missing ++
+ "+ " ++ show (unknownSizeKeys d) ++
" keys of unknown size"
staleSize :: String -> (Git.Repo -> FilePath) -> Stat
@@ -192,7 +216,7 @@ staleSize label dirspec = do
if null keys
then nostat
else stat label $ json (++ aside "clean up with git-annex unused") $
- return $ keySizeSum $ S.fromList keys
+ return $ showSizeKeys $ foldKeys keys
aside :: String -> String
aside s = " (" ++ s ++ ")"
diff --git a/Command/Unused.hs b/Command/Unused.hs
index ba14bfc4a..69b58c5e7 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010-2011 Joey Hess <joey@kitenet.net>
+ - Copyright 2010-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -171,7 +171,7 @@ excludeReferenced l = do
go s (r:rs)
| s == S.empty = return [] -- optimisation
| otherwise = do
- !s' <- withKeysReferencedInGit r s S.delete
+ s' <- withKeysReferencedInGit r s S.delete
go s' rs
{- Finds items in the first, smaller list, that are not
@@ -186,21 +186,14 @@ exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
where
remove a b = foldl (flip S.delete) b a
-{- List of keys referenced by symlinks in the git repo. -}
-getKeysReferenced :: Annex [Key]
-getKeysReferenced = do
- top <- fromRepo Git.workTree
- files <- inRepo $ LsFiles.inRepo [top]
- keypairs <- mapM Backend.lookupFile files
- return $ map fst $ catMaybes keypairs
-
{- Given an initial value, mutates it using an action for each
- key referenced by symlinks in the git repo. -}
withKeysReferenced :: v -> (Key -> v -> v) -> Annex v
-withKeysReferenced initial a = do
- top <- fromRepo Git.workTree
- go initial =<< inRepo (LsFiles.inRepo [top])
+withKeysReferenced initial a = go initial =<< files
where
+ files = do
+ top <- fromRepo Git.workTree
+ inRepo $ LsFiles.inRepo [top]
go v [] = return v
go v (f:fs) = do
x <- Backend.lookupFile f
diff --git a/debian/changelog b/debian/changelog
index 2cb5a1aea..6da54056c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -6,6 +6,7 @@ git-annex (3.20120310) UNRELEASED; urgency=low
* unused: Reduce memory usage significantly. Still not constant
space, but now only needs to store the set of file contents that
are present in the annex in memory.
+ * status: Fixed to run in nearly constant space.
-- Joey Hess <joeyh@debian.org> Sat, 10 Mar 2012 14:03:22 -0400