summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2015-04-12 12:49:11 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2015-04-12 12:54:41 -0400
commitc6ba6fb9bda03fa1ec43aae54197777a04a54a62 (patch)
tree4dbdab956fb87041e9b13fea7981e01a91602811
parent5b3e97f72cf4fdaac4642b6e9fd48ff6419bda1d (diff)
info dir: Added information about repositories that contain files in the specified directory.
This is a nearly free feature; it piggybacks on the location log lookups done for the numcopies stats. So, the only extra overhead is updating the map of repository sizes. However, I had to switch to Data.Map.Strict, which needs containers 0.5. If backporting to wheezy, will probably need to revert this commit.
-rw-r--r--Command/Info.hs77
-rw-r--r--Config/NumCopies.hs11
-rw-r--r--Remote.hs2
-rw-r--r--debian/changelog2
-rw-r--r--git-annex.cabal2
5 files changed, 67 insertions, 27 deletions
diff --git a/Command/Info.hs b/Command/Info.hs
index db5953050..cf28c85b9 100644
--- a/Command/Info.hs
+++ b/Command/Info.hs
@@ -10,7 +10,7 @@
module Command.Info where
import "mtl" Control.Monad.State.Strict
-import qualified Data.Map as M
+import qualified Data.Map.Strict as M
import Text.JSON
import Data.Tuple
import Data.Ord
@@ -66,7 +66,7 @@ instance Show Variance where
data StatInfo = StatInfo
{ presentData :: Maybe KeyData
, referencedData :: Maybe KeyData
- , remoteData :: M.Map UUID KeyData
+ , repoData :: M.Map UUID KeyData
, numCopiesStats :: Maybe NumCopiesStats
}
@@ -156,9 +156,9 @@ selStats fast_stats slow_stats = do
global_fast_stats :: [Stat]
global_fast_stats =
[ repository_mode
- , remote_list Trusted
- , remote_list SemiTrusted
- , remote_list UnTrusted
+ , repo_list Trusted
+ , repo_list SemiTrusted
+ , repo_list UnTrusted
, transfer_list
, disk_size
]
@@ -184,6 +184,7 @@ dir_fast_stats =
dir_slow_stats :: [FilePath -> Stat]
dir_slow_stats =
[ const numcopies_stats
+ , const reposizes_stats
]
file_stats :: FilePath -> Key -> [Stat]
@@ -245,8 +246,8 @@ repository_mode = simpleStat "repository mode" $ lift $
)
)
-remote_list :: TrustLevel -> Stat
-remote_list level = stat n $ nojson $ lift $ do
+repo_list :: TrustLevel -> Stat
+repo_list level = stat n $ nojson $ lift $ do
us <- filter (/= NoUUID) . M.keys
<$> (M.union <$> uuidMap <*> remoteMap Remote.name)
rs <- fst <$> trustPartition level us
@@ -389,6 +390,23 @@ numcopies_stats = stat "numcopies stats" $ nojson $
. map (\(variance, count) -> show variance ++ ": " ++ show count)
. sortBy (flip (comparing snd)) . M.toList
+reposizes_stats :: Stat
+reposizes_stats = stat "repositories containing these files" $ nojson $
+ calc <$> lift uuidDescriptions <*> cachedRepoData
+ where
+ calc descm = multiLine
+ . format
+ . map (\(u, d) -> line descm u d)
+ . sortBy (flip (comparing (sizeKeys . snd))) . M.toList
+ line descm u d = (sz, fromUUID u ++ " -- " ++ desc)
+ where
+ sz = roughSize storageUnits True (sizeKeys d)
+ desc = fromMaybe "" (M.lookup u descm)
+ format l = map (\(c1, c2) -> lpad maxc1 c1 ++ ": " ++ c2 ) l
+ where
+ maxc1 = maximum (map (length . fst) l)
+ lpad n s = (replicate (n - length s) ' ') ++ s
+
cachedPresentData :: StatState KeyData
cachedPresentData = do
s <- get
@@ -402,11 +420,11 @@ cachedPresentData = do
cachedRemoteData :: UUID -> StatState KeyData
cachedRemoteData u = do
s <- get
- case M.lookup u (remoteData s) of
+ case M.lookup u (repoData s) of
Just v -> return v
Nothing -> do
v <- foldKeys <$> lift (loggedKeysFor u)
- put s { remoteData = M.insert u v (remoteData s) }
+ put s { repoData = M.insert u v (repoData s) }
return v
cachedReferencedData :: StatState KeyData
@@ -424,17 +442,21 @@ cachedReferencedData = do
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
cachedNumCopiesStats = numCopiesStats <$> get
+-- currently only available for directory info
+cachedRepoData :: StatState (M.Map UUID KeyData)
+cachedRepoData = repoData <$> get
+
getDirStatInfo :: FilePath -> Annex StatInfo
getDirStatInfo dir = do
fast <- Annex.getState Annex.fast
matcher <- Limit.getMatcher
- (presentdata, referenceddata, numcopiesstats) <-
+ (presentdata, referenceddata, numcopiesstats, repodata) <-
Command.Unused.withKeysFilesReferencedIn dir initial
(update matcher fast)
- return $ StatInfo (Just presentdata) (Just referenceddata) M.empty (Just numcopiesstats)
+ return $ StatInfo (Just presentdata) (Just referenceddata) repodata (Just numcopiesstats)
where
- initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats)
- update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats) =
+ initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats, M.empty)
+ update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) =
ifM (matcher $ MatchingFile $ FileInfo file file)
( do
!presentdata' <- ifM (inAnnex key)
@@ -442,10 +464,13 @@ getDirStatInfo dir = do
, return presentdata
)
let !referenceddata' = addKey key referenceddata
- !numcopiesstats' <- if fast
- then return numcopiesstats
- else updateNumCopiesStats key file numcopiesstats
- return $! (presentdata', referenceddata', numcopiesstats')
+ (!numcopiesstats', !repodata') <- if fast
+ then return (numcopiesstats, repodata)
+ else do
+ locs <- Remote.keyLocations key
+ nc <- updateNumCopiesStats file numcopiesstats locs
+ return (nc, updateRepoData key locs repodata)
+ return $! (presentdata', referenceddata', numcopiesstats', repodata')
, return vs
)
@@ -465,15 +490,23 @@ addKey key (KeyData count size unknownsize backends) =
{- All calculations strict to avoid thunks when repeatedly
- applied to many keys. -}
!count' = count + 1
- !backends' = M.insertWith' (+) (keyBackendName key) 1 backends
+ !backends' = M.insertWith (+) (keyBackendName key) 1 backends
!size' = maybe size (+ size) ks
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
ks = keySize key
-updateNumCopiesStats :: Key -> FilePath -> NumCopiesStats -> Annex NumCopiesStats
-updateNumCopiesStats key file (NumCopiesStats m) = do
- !variance <- Variance <$> numCopiesCheck file key (-)
- let !m' = M.insertWith' (+) variance 1 m
+updateRepoData :: Key -> [UUID] -> M.Map UUID KeyData -> M.Map UUID KeyData
+updateRepoData key locs m = m'
+ where
+ !m' = M.unionWith (\_old new -> new) m $
+ M.fromList $ zip locs (map update locs)
+ update loc = addKey key (fromMaybe emptyKeyData $ M.lookup loc m)
+
+updateNumCopiesStats :: FilePath -> NumCopiesStats -> [UUID] -> Annex NumCopiesStats
+updateNumCopiesStats file (NumCopiesStats m) locs = do
+ have <- trustExclude UnTrusted locs
+ !variance <- Variance <$> numCopiesCheck' file (-) have
+ let !m' = M.insertWith (+) variance 1 m
let !ret = NumCopiesStats m'
return ret
diff --git a/Config/NumCopies.hs b/Config/NumCopies.hs
index b25e0818d..50dcdf684 100644
--- a/Config/NumCopies.hs
+++ b/Config/NumCopies.hs
@@ -11,9 +11,10 @@ module Config.NumCopies (
getFileNumCopies,
getGlobalFileNumCopies,
getNumCopies,
- numCopiesCheck,
deprecatedNumCopies,
- defaultNumCopies
+ defaultNumCopies,
+ numCopiesCheck,
+ numCopiesCheck',
) where
import Common.Annex
@@ -75,6 +76,10 @@ getFileNumCopies' file = maybe getGlobalNumCopies (return . Just) =<< getattr
- belived to exist, and the configured value. -}
numCopiesCheck :: FilePath -> Key -> (Int -> Int -> v) -> Annex v
numCopiesCheck file key vs = do
- NumCopies needed <- getFileNumCopies file
have <- trustExclude UnTrusted =<< Remote.keyLocations key
+ numCopiesCheck' file vs have
+
+numCopiesCheck' :: FilePath -> (Int -> Int -> v) -> [UUID] -> Annex v
+numCopiesCheck' file vs have = do
+ NumCopies needed <- getFileNumCopies file
return $ length have `vs` needed
diff --git a/Remote.hs b/Remote.hs
index 771c63472..8a03f757d 100644
--- a/Remote.hs
+++ b/Remote.hs
@@ -83,7 +83,7 @@ remoteMap' mkv mkk = M.fromList . mapMaybe mk <$> remoteList
Nothing -> Nothing
Just k -> Just (k, mkv r)
-{- Map of UUIDs of remotes and their descriptions.
+{- Map of UUIDs of repositories and their descriptions.
- The names of Remotes are added to suppliment any description that has
- been set for a repository. -}
uuidDescriptions :: Annex (M.Map UUID String)
diff --git a/debian/changelog b/debian/changelog
index f4afbc461..8a8c7be35 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -4,6 +4,8 @@ git-annex (5.20150410) UNRELEASED; urgency=medium
activity from other uuids.
* Union merge could fall over if there was a file in the repository
with the same name as a git ref. Now fixed.
+ * info dir: Added information about repositories that
+ contain files in the specified directory.
-- Joey Hess <id@joeyh.name> Thu, 09 Apr 2015 20:59:43 -0400
diff --git a/git-annex.cabal b/git-annex.cabal
index 093c45d0e..b5ddb7d79 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -104,7 +104,7 @@ Flag network-uri
Executable git-annex
Main-Is: git-annex.hs
Build-Depends: MissingH, hslogger, directory, filepath,
- containers, utf8-string, mtl (>= 2),
+ containers (>= 0.5.0.0), utf8-string, mtl (>= 2),
bytestring, old-locale, time, dataenc, SHA, process, json,
base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.6), transformers,
IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance,