From a8816efc140108cc62713cc6227db69ef96cd913 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 16 May 2011 21:18:34 -0400 Subject: status: New subcommand to show info about an annex, including its size. --- Command/Status.hs | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Command/Unused.hs | 18 +++---- GitAnnex.hs | 2 + debian/changelog | 6 +++ doc/git-annex.mdwn | 10 ++++ 5 files changed, 178 insertions(+), 9 deletions(-) create mode 100644 Command/Status.hs diff --git a/Command/Status.hs b/Command/Status.hs new file mode 100644 index 000000000..e8fce3bca --- /dev/null +++ b/Command/Status.hs @@ -0,0 +1,151 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Status where + +import Control.Monad.State +import Data.Maybe +import System.IO +import Data.List +import qualified Data.Map as M + +import qualified Annex +import qualified BackendClass +import qualified RemoteClass +import qualified Remote +import qualified Command.Unused +import Command +import Types +import DataUnits +import Content +import Key + +-- a named computation that produces a statistic +type Stat = (String, StatState String) + +-- cached info that multiple Stats may need +type SizeList a = ([a], Int) +data StatInfo = StatInfo + { keysPresentCache :: (Maybe (SizeList Key)) + , keysReferencedCache :: (Maybe (SizeList Key)) + } + +-- a state monad for running Stats in +type StatState = StateT StatInfo Annex + +command :: [Command] +command = [repoCommand "status" (paramNothing) seek + "shows status information about the annex"] + +seek :: [CommandSeek] +seek = [withNothing start] + +{- Order is significant. Less expensive operations, and operations + - that share data go together. + -} +faststats :: [Stat] +faststats = + [ supported_backends + , supported_remote_types + , local_annex_keys + , local_annex_size + ] +slowstats :: [Stat] +slowstats = + [ total_annex_keys + , total_annex_size + , backend_usage + ] + +start :: CommandStartNothing +start = do + fast <- Annex.getState Annex.fast + let todo = if fast then faststats else faststats ++ slowstats + evalStateT (mapM_ showStat todo) (StatInfo Nothing Nothing) + stop + +stat :: String -> StatState String -> Stat +stat desc a = (desc, a) + +showStat :: Stat -> StatState () +showStat (desc, a) = do + liftIO $ putStr $ desc ++ ": " + liftIO $ hFlush stdout + liftIO . putStrLn =<< a + + +supported_backends :: Stat +supported_backends = stat "supported backends" $ + lift (Annex.getState Annex.supportedBackends) >>= + return . unwords . (map BackendClass.name) + +supported_remote_types :: Stat +supported_remote_types = stat "supported remote types" $ + return $ unwords $ map RemoteClass.typename Remote.remoteTypes + +local_annex_size :: Stat +local_annex_size = stat "local annex size" $ + cachedKeysPresent >>= keySizeSum + +total_annex_size :: Stat +total_annex_size = stat "total annex size" $ + cachedKeysReferenced >>= keySizeSum + +local_annex_keys :: Stat +local_annex_keys = stat "local annex keys" $ + return . show . snd =<< cachedKeysPresent + +total_annex_keys :: Stat +total_annex_keys = stat "total annex keys" $ + return . show . snd =<< cachedKeysReferenced + +backend_usage :: Stat +backend_usage = stat "backend usage" $ + return . usage =<< cachedKeysReferenced + where + usage (ks, _) = pp "" $ sort $ map tflip $ splits ks + splits :: [Key] -> [(String, Integer)] + splits ks = M.toList $ M.fromListWith (+) $ map tcount ks + tcount k = (keyBackendName k, 1) + tflip (a, b) = (b, a) + pp c [] = c + pp c ((n, b):xs) = "\n\t" ++ b ++ ": " ++ show n ++ pp c xs + +cachedKeysPresent :: StatState (SizeList Key) +cachedKeysPresent = do + s <- get + case keysPresentCache s of + Just v -> return v + Nothing -> do + keys <- lift $ getKeysPresent + let v = (keys, length keys) + put s { keysPresentCache = Just v } + return v + +cachedKeysReferenced :: StatState (SizeList Key) +cachedKeysReferenced = do + s <- get + case keysReferencedCache s of + Just v -> return v + Nothing -> do + keys <- lift $ Command.Unused.getKeysReferenced + -- A given key may be referenced repeatedly. + -- nub does not seem too slow (yet).. + let uniques = nub keys + let v = (uniques, length uniques) + put s { keysReferencedCache = Just v } + return v + +keySizeSum :: SizeList Key -> StatState String +keySizeSum (keys, len) = do + let knownsize = catMaybes $ map keySize keys + let total = roughSize storageUnits False $ foldl (+) 0 knownsize + let missing = len - length knownsize + return $ total ++ + if missing > 0 + then " (but " ++ show missing ++ " keys have unknown size)" + else "" diff --git a/Command/Unused.hs b/Command/Unused.hs index a2e1c86de..1482f057e 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -144,16 +144,16 @@ unusedKeys = do if fast then do showNote "fast mode enabled; only finding stale files" - tmp <- staleKeys' gitAnnexTmpDir - bad <- staleKeys' gitAnnexBadDir + tmp <- staleKeys gitAnnexTmpDir + bad <- staleKeys gitAnnexBadDir return ([], bad, tmp) else do showNote "checking for unused data..." present <- getKeysPresent referenced <- getKeysReferenced let unused = present `exclude` referenced - staletmp <- staleKeys gitAnnexTmpDir present - stalebad <- staleKeys gitAnnexBadDir present + staletmp <- staleKeysPrune gitAnnexTmpDir present + stalebad <- staleKeysPrune gitAnnexBadDir present return (unused, stalebad, staletmp) {- Finds items in the first, smaller list, that are not @@ -182,9 +182,9 @@ getKeysReferenced = do - When a list of presently available keys is provided, stale keys - that no longer have value are deleted. -} -staleKeys :: (Git.Repo -> FilePath) -> [Key] -> Annex [Key] -staleKeys dirspec present = do - contents <- staleKeys' dirspec +staleKeysPrune :: (Git.Repo -> FilePath) -> [Key] -> Annex [Key] +staleKeysPrune dirspec present = do + contents <- staleKeys dirspec let stale = contents `exclude` present let dup = contents `exclude` stale @@ -195,8 +195,8 @@ staleKeys dirspec present = do return stale -staleKeys' :: (Git.Repo -> FilePath) -> Annex [Key] -staleKeys' dirspec = do +staleKeys :: (Git.Repo -> FilePath) -> Annex [Key] +staleKeys dirspec = do g <- Annex.gitRepo let dir = dirspec g exists <- liftIO $ doesDirectoryExist dir diff --git a/GitAnnex.hs b/GitAnnex.hs index 736b430e6..99aec187a 100644 --- a/GitAnnex.hs +++ b/GitAnnex.hs @@ -36,6 +36,7 @@ import qualified Command.Lock import qualified Command.PreCommit import qualified Command.Find import qualified Command.Whereis +import qualified Command.Status import qualified Command.Migrate import qualified Command.Uninit import qualified Command.Trust @@ -72,6 +73,7 @@ cmds = concat , Command.DropUnused.command , Command.Find.command , Command.Whereis.command + , Command.Status.command , Command.Migrate.command , Command.Map.command , Command.Upgrade.command diff --git a/debian/changelog b/debian/changelog index 5cae0a8b5..d759d3672 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +git-annex (0.20110517) UNRELEASED; urgency=low + + * status: New subcommand to show info about an annex, including its size. + + -- Joey Hess Mon, 16 May 2011 20:27:46 -0400 + git-annex (0.20110516) unstable; urgency=low * Add a few tweaks to make it easy to use the Internet Archive's variant diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 450b95a0d..e2a04d27b 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -182,6 +182,16 @@ Many git-annex commands will stage changes for later `git commit` by you. Displays a list of repositories known to contain the content of the specified file or files. +* status + + Displays some statistics and other information, including how much data + is in the annex. + + Some of the statistics can take a while to generate, and those + come last. You can ctrl-c this command once it's displayed the + information you wanted to see. Or, use --fast to only display + the first, fast(ish) statistics. + * migrate [path ...] Changes the specified annexed files to store their content in the -- cgit v1.2.3