summaryrefslogtreecommitdiff
path: root/Command
diff options
context:
space:
mode:
Diffstat (limited to 'Command')
-rw-r--r--Command/Status.hs19
-rw-r--r--Command/Unused.hs24
2 files changed, 36 insertions, 7 deletions
diff --git a/Command/Status.hs b/Command/Status.hs
index 0b1741dc0..eadb4f163 100644
--- a/Command/Status.hs
+++ b/Command/Status.hs
@@ -76,6 +76,7 @@ slow_stats =
, local_annex_size
, known_annex_keys
, known_annex_size
+ , bloom_info
, backend_usage
]
@@ -127,7 +128,7 @@ remote_list level desc = stat n $ nojson $ lift $ do
return $ if null s then "0" else show (length rs) ++ "\n" ++ beginning s
where
n = desc ++ " repositories"
-
+
local_annex_size :: Stat
local_annex_size = stat "local annex size" $ json id $
showSizeKeys <$> cachedPresentData
@@ -136,6 +137,22 @@ local_annex_keys :: Stat
local_annex_keys = stat "local annex keys" $ json show $
countKeys <$> cachedPresentData
+bloom_info :: Stat
+bloom_info = stat "bloom filter size" $ json id $ do
+ localkeys <- countKeys <$> cachedPresentData
+ capacity <- fromIntegral <$> lift Command.Unused.bloomCapacity
+ let note = aside $
+ if localkeys >= capacity
+ then "appears too small for this repository; adjust annex.bloomcapacity"
+ else "has room for " ++ show (capacity - localkeys) ++ " more local annex keys"
+
+ -- Two bloom filters are used at the same time, so double the size
+ -- of one.
+ size <- roughSize memoryUnits True . (* 2) . fromIntegral . fst <$>
+ lift Command.Unused.bloomBitsHashes
+
+ return $ size ++ note
+
known_annex_size :: Stat
known_annex_size = stat "known annex size" $ json id $
showSizeKeys <$> cachedReferencedData
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 028e20445..b878ab265 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -29,6 +29,7 @@ import qualified Git.Command
import qualified Git.Ref
import qualified Git.LsFiles as LsFiles
import qualified Git.LsTree as LsTree
+import qualified Git.Config
import qualified Backend
import qualified Remote
import qualified Annex.Branch
@@ -182,6 +183,22 @@ exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
where
remove a b = foldl (flip S.delete) b a
+{- A bloom filter capable of holding half a million keys with a
+ - false positive rate of 1 in 1000 uses around 8 mb of memory,
+ - so will easily fit on even my lowest memory systems.
+ -}
+bloomCapacity :: Annex Int
+bloomCapacity = fromMaybe 500000 . readish
+ <$> fromRepo (Git.Config.get "annex.bloomcapacity" "")
+bloomAccuracy :: Annex Int
+bloomAccuracy = fromMaybe 1000 . readish
+ <$> fromRepo (Git.Config.get "annex.bloomaccuracy" "")
+bloomBitsHashes :: Annex (Int, Int)
+bloomBitsHashes = do
+ capacity <- bloomCapacity
+ accuracy <- bloomAccuracy
+ return $ suggestSizing capacity (1/ fromIntegral accuracy)
+
{- Creates a bloom filter, and runs an action, such as withKeysReferenced,
- to populate it.
-
@@ -193,12 +210,7 @@ exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
-}
genBloomFilter :: Hashable t => (v -> t) -> ((v -> Annex ()) -> Annex b) -> Annex (Bloom t)
genBloomFilter convert populate = do
- -- A bloom filter capable of holding half a million keys with a
- -- false positive rate of 0.1% uses around 8 mb of memory.
- -- TODO: make this configurable, for the really large repos,
- -- or really low false positive rates.
- let (numbits, numhashes) = suggestSizing 500000 0.001
-
+ (numbits, numhashes) <- bloomBitsHashes
bloom <- lift $ newMB (cheapHashes numhashes) numbits
_ <- populate $ \v -> lift $ insertMB bloom (convert v)
lift $ unsafeFreezeMB bloom