summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-01-22 16:35:32 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-01-22 16:35:32 -0400
commit180fbceee03badc450c84c28ef8219fa68d80bd6 (patch)
tree1739e92f46067ae0be84af6789b3d3b9adb09ee1
parent9a5de318d15f0234080a6f0bd802fe073cf57334 (diff)
add "unused" preferred content expression
With a really nice optimisation that keeps it from having any overhead in normal operation! This commit was sponsored by Ulises Vitulli.
-rw-r--r--Annex.hs3
-rw-r--r--Annex/FileMatcher.hs1
-rw-r--r--Limit.hs10
-rw-r--r--Logs/Unused.hs18
-rw-r--r--Seek.hs4
-rw-r--r--debian/changelog3
-rw-r--r--doc/todo/Limit_file_revision_history.mdwn15
7 files changed, 41 insertions, 13 deletions
diff --git a/Annex.hs b/Annex.hs
index e057bb9d2..9beded53f 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -46,6 +46,7 @@ import Git.CheckAttr
import Git.CheckIgnore
import Git.SharedRepository
import qualified Git.Queue
+import Types.Key
import Types.Backend
import Types.GitConfig
import qualified Types.Remote
@@ -112,6 +113,7 @@ data AnnexState = AnnexState
, inodeschanged :: Maybe Bool
, useragent :: Maybe String
, errcounter :: Integer
+ , unusedkeys :: Maybe (S.Set Key)
}
newState :: GitConfig -> Git.Repo -> AnnexState
@@ -148,6 +150,7 @@ newState c r = AnnexState
, inodeschanged = Nothing
, useragent = Nothing
, errcounter = 0
+ , unusedkeys = Nothing
}
{- Makes an Annex state object for the specified git repo.
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs
index b26a0d7ac..c144920cf 100644
--- a/Annex/FileMatcher.hs
+++ b/Annex/FileMatcher.hs
@@ -65,6 +65,7 @@ parseToken checkpresent checkpreferreddir groupmap t
| t `elem` tokens = Right $ token t
| t == "present" = use checkpresent
| t == "inpreferreddir" = use checkpreferreddir
+ | t == "unused" = Right (Operation limitUnused)
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
M.fromList
[ ("include", limitInclude)
diff --git a/Limit.hs b/Limit.hs
index 6ce444325..8250544a4 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -30,6 +30,7 @@ import Types.Group
import Types.FileMatcher
import Types.Limit
import Logs.Group
+import Logs.Unused
import Utility.HumanTime
import Utility.DataUnits
@@ -199,6 +200,15 @@ limitLackingCopies approx want = case readish want of
return $ numcopies - length us >= needed
approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies
+{- Match keys that are unused.
+ -
+ - This has a nice optimisation: When a file exists,
+ - its key is obviously not unused.
+ -}
+limitUnused :: MatchFiles
+limitUnused _ (MatchingFile _) = return False
+limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys
+
{- Adds a limit to skip files not believed to be present in all
- repositories in the specified group. -}
addInAllGroup :: String -> Annex ()
diff --git a/Logs/Unused.hs b/Logs/Unused.hs
index daeeb93ab..74f46b85e 100644
--- a/Logs/Unused.hs
+++ b/Logs/Unused.hs
@@ -21,14 +21,17 @@ module Logs.Unused (
readUnusedLog,
readUnusedMap,
unusedKeys,
+ unusedKeys'
) where
import qualified Data.Map as M
+import qualified Data.Set as S
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import Common.Annex
+import qualified Annex
import Types.Key
import Utility.Tmp
@@ -84,5 +87,16 @@ readUnusedLog prefix = do
readUnusedMap :: FilePath -> Annex UnusedMap
readUnusedMap = log2map <$$> readUnusedLog
-unusedKeys :: Annex [Key]
-unusedKeys = M.keys <$> readUnusedLog ""
+{- Set of unused keys. This is cached for speed. -}
+unusedKeys :: Annex (S.Set Key)
+unusedKeys = maybe (setUnusedKeys =<< unusedKeys') return
+ =<< Annex.getState Annex.unusedkeys
+
+unusedKeys' :: Annex [Key]
+unusedKeys' = M.keys <$> readUnusedLog ""
+
+setUnusedKeys :: [Key] -> Annex (S.Set Key)
+setUnusedKeys ks = do
+ let v = S.fromList ks
+ Annex.changeState $ \s -> s { Annex.unusedkeys = Just v }
+ return v
diff --git a/Seek.hs b/Seek.hs
index 57bedfc84..3242dfb33 100644
--- a/Seek.hs
+++ b/Seek.hs
@@ -133,9 +133,9 @@ withKeyOptions keyop fallbackop params = do
auto <- Annex.getState Annex.auto
case (allkeys || bare , unused, auto ) of
(True , False , False) -> go loggedKeys
- (False , True , False) -> go unusedKeys
+ (False , True , False) -> go unusedKeys'
(True , True , _ )
- | bare && not allkeys -> go unusedKeys
+ | bare && not allkeys -> go unusedKeys'
| otherwise -> error "Cannot use --all with --unused."
(False , False , _ ) -> fallbackop params
(_ , _ , True )
diff --git a/debian/changelog b/debian/changelog
index aba8a5d3f..7c2a0d3b3 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -14,7 +14,8 @@ git-annex (5.20140118) UNRELEASED; urgency=medium
command is used to set the global number of copies, any annex.numcopies
git configs will be ignored.
* assistant: Make the prefs page set the global numcopies.
- * Add lackingcopies and approxlackingcopies preferred content expressions.
+ * Add lackingcopies, approxlackingcopies, and unused to
+ preferred content expressions.
* Client, transfer, incremental backup, and archive repositories
now want to get content that does not yet have enough copies.
* repair: Check git version at run time.
diff --git a/doc/todo/Limit_file_revision_history.mdwn b/doc/todo/Limit_file_revision_history.mdwn
index 9cdfe5e9b..c00b555b1 100644
--- a/doc/todo/Limit_file_revision_history.mdwn
+++ b/doc/todo/Limit_file_revision_history.mdwn
@@ -39,17 +39,16 @@ Finally, how to specify a feature request for git-annex?
> So, let's spec out a design.
>
> * Add preferred content terminal to configure whether a repository wants
-> to hang on to unused content.
-> Something like "unused=true" I suppose, because not having a parameter
-> would complicate preferred content parsing, and I cannot think
-> of a useful parameter. (It cannot be a timestamp, because there's
-> no way repos can agree on about when a key became unused.)
+> to hang on to unused content. Simply `unused`.
+> (It cannot include a timestamp, because there's
+> no way repos can agree on about when a key became unused.) **done**
> * In order to quickly match that terminal, the Annex monad will need
-> to keep a Set of unused Keys. This should only be loaded on demand.
+> to keep a Set of unused Keys. This should only be loaded on demand.
+> **done**
> NB: There is some potential for a great many unused Keys to cause
> memory usage to balloon.
> * Client repositories will end their preferred content with
-> `and unused=false`. Transfer repositories too, because typically
+> `and (not unused)`. Transfer repositories too, because typically
> only client repos connect to them, and so otherwise unused files
> would build up there. Backup repos would want unused files. I
> think that archive repos would too.
@@ -90,7 +89,7 @@ Finally, how to specify a feature request for git-annex?
> client directly edits it, or deletes it, it loses the old version,
> but the other client will still be storing that old version.
>
-> ## Stability analysis for unused= in preferred content expressions
+> ## Stability analysis for unused in preferred content expressions
>
> This is tricky, because two repos that are otherwise entirely
> in sync may have differing opinons about whether a key is unused,