aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CmdLine/Usage.hs2
-rw-r--r--Command/Unused.hs56
-rw-r--r--Types/RefSpec.hs44
-rw-r--r--debian/changelog2
-rw-r--r--doc/git-annex-unused.mdwn32
-rw-r--r--doc/todo/unused_by_refspec.mdwn5
6 files changed, 116 insertions, 25 deletions
diff --git a/CmdLine/Usage.hs b/CmdLine/Usage.hs
index 82619a304..ad1d4e583 100644
--- a/CmdLine/Usage.hs
+++ b/CmdLine/Usage.hs
@@ -95,6 +95,8 @@ paramFile :: String
paramFile = "FILE"
paramRef :: String
paramRef = "REF"
+paramRefSpec :: String
+paramRefSpec = "REFSPEC"
paramGroup :: String
paramGroup = "GROUP"
paramExpression :: String
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 4bbde4da4..a5698c833 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -31,34 +31,41 @@ import qualified Remote
import qualified Annex.Branch
import Annex.CatFile
import Types.Key
+import Types.RefSpec
import Git.FilePath
import Logs.View (is_branchView)
import Utility.Bloom
cmd :: [Command]
-cmd = [withOptions [unusedFromOption] $ command "unused" paramNothing seek
- SectionMaintenance "look for unused file content"]
+cmd = [withOptions [unusedFromOption, refSpecOption] $
+ command "unused" paramNothing seek
+ SectionMaintenance "look for unused file content"]
unusedFromOption :: Option
unusedFromOption = fieldOption ['f'] "from" paramRemote "remote to check for unused content"
+refSpecOption :: Option
+refSpecOption = fieldOption [] "used-refspec" paramRefSpec "refs to consider used (default: all refs)"
+
seek :: CommandSeek
seek = withNothing start
{- Finds unused content in the annex. -}
start :: CommandStart
start = do
- from <- Annex.getField $ optionName unusedFromOption
+ !refspec <- maybe allRefSpec (either error id . parseRefSpec)
+ <$> Annex.getField (optionName refSpecOption)
+ from <- Annex.getField (optionName unusedFromOption)
let (name, action) = case from of
- Nothing -> (".", checkUnused)
- Just "." -> (".", checkUnused)
- Just "here" -> (".", checkUnused)
- Just n -> (n, checkRemoteUnused n)
+ Nothing -> (".", checkUnused refspec)
+ Just "." -> (".", checkUnused refspec)
+ Just "here" -> (".", checkUnused refspec)
+ Just n -> (n, checkRemoteUnused n refspec)
showStart "unused" name
next action
-checkUnused :: CommandPerform
-checkUnused = chain 0
+checkUnused :: RefSpec -> CommandPerform
+checkUnused refspec = chain 0
[ check "" unusedMsg $ findunused =<< Annex.getState Annex.fast
, check "bad" staleBadMsg $ staleKeysPrune gitAnnexBadDir False
, check "tmp" staleTmpMsg $ staleKeysPrune gitAnnexTmpObjectDir True
@@ -71,20 +78,20 @@ checkUnused = chain 0
showAction "checking for unused data"
-- InAnnex, not InRepository because if a direct mode
-- file exists, it is obviously not unused.
- excludeReferenced =<< getKeysPresent InAnnex
+ excludeReferenced refspec =<< getKeysPresent InAnnex
chain _ [] = next $ return True
chain v (a:as) = do
v' <- a v
chain v' as
-checkRemoteUnused :: String -> CommandPerform
-checkRemoteUnused name = go =<< fromJust <$> Remote.byNameWithUUID (Just name)
+checkRemoteUnused :: String -> RefSpec -> CommandPerform
+checkRemoteUnused name refspec = go =<< fromJust <$> Remote.byNameWithUUID (Just name)
where
go r = do
showAction "checking for unused data"
_ <- check "" (remoteUnusedMsg r) (remoteunused r) 0
next $ return True
- remoteunused r = excludeReferenced <=< loggedKeysFor $ Remote.uuid r
+ remoteunused r = excludeReferenced refspec <=< loggedKeysFor $ Remote.uuid r
check :: FilePath -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int
check file msg a c = do
@@ -145,7 +152,7 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\
- * Build a bloom filter of all keys referenced by symlinks. This
- is the fastest one to build and will filter out most keys.
- * If keys remain, build a second bloom filter of keys referenced by
- - all branches.
+ - branches maching the RefSpec.
- * The list is streamed through these bloom filters lazily, so both will
- exist at the same time. This means that twice the memory is used,
- but they're relatively small, so the added complexity of using a
@@ -157,13 +164,13 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\
- Short-circuiting if the first filter filters all the keys handles the
- other common case.
-}
-excludeReferenced :: [Key] -> Annex [Key]
-excludeReferenced ks = runfilter firstlevel ks >>= runfilter secondlevel
+excludeReferenced :: RefSpec -> [Key] -> Annex [Key]
+excludeReferenced refspec ks = runfilter firstlevel ks >>= runfilter secondlevel
where
runfilter _ [] = return [] -- optimisation
runfilter a l = bloomFilter show l <$> genBloomFilter show a
firstlevel = withKeysReferencedM
- secondlevel = withKeysReferencedInGit
+ secondlevel = withKeysReferencedInGit refspec
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
@@ -258,14 +265,15 @@ withKeysReferenced' mdir initial a = do
!v' <- a k f v
go v' fs
-withKeysReferencedInGit :: (Key -> Annex ()) -> Annex ()
-withKeysReferencedInGit a = do
+withKeysReferencedInGit :: RefSpec -> (Key -> Annex ()) -> Annex ()
+withKeysReferencedInGit refspec a = do
current <- inRepo Git.Branch.currentUnsafe
shaHead <- maybe (return Nothing) (inRepo . Git.Ref.sha) current
- showref >>= mapM_ (withKeysReferencedInGitRef a) .
- relevantrefs (shaHead, current)
+ usedrefs <- applyRefSpec refspec . relevantrefs (shaHead, current)
+ <$> inRepo (Git.Command.pipeReadStrict [Param "show-ref"])
+ forM_ usedrefs $
+ withKeysReferencedInGitRef a
where
- showref = inRepo $ Git.Command.pipeReadStrict [Param "show-ref"]
relevantrefs headRef = addHead headRef .
filter ourbranches .
map (separate (== ' ')) .
@@ -293,8 +301,8 @@ withKeysReferencedInGitRef a ref = do
showAction $ "checking " ++ Git.Ref.describe ref
bare <- isBareRepo
(ts,clean) <- inRepo $ if bare
- then DiffTree.diffIndex ref
- else DiffTree.diffWorkTree ref
+ then DiffTree.diffIndex ref
+ else DiffTree.diffWorkTree ref
let lookAtWorkingTree = not bare && ref == Git.Ref.headRef
forM_ ts $ tKey lookAtWorkingTree >=> maybe noop a
liftIO $ void clean
diff --git a/Types/RefSpec.hs b/Types/RefSpec.hs
new file mode 100644
index 000000000..42f4c6226
--- /dev/null
+++ b/Types/RefSpec.hs
@@ -0,0 +1,44 @@
+{- This is not the same as git's fetch/push refspecs.
+ -
+ - Copyright 2015 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Types.RefSpec where
+
+import Common
+import Utility.Glob
+import Git.Types
+
+import Data.Either
+
+type RefSpec = [RefSpecPart]
+
+data RefSpecPart = AddRef Ref | AddMatching Glob | RemoveMatching Glob
+
+allRefSpec :: RefSpec
+allRefSpec = [AddMatching $ compileGlob "*" CaseSensative]
+
+parseRefSpec :: String -> Either String RefSpec
+parseRefSpec v = case partitionEithers (map mk $ split ":" v) of
+ ([],refspec) -> Right refspec
+ (e:_,_) -> Left e
+ where
+ mk ('+':s)
+ | any (`elem` s) "*?" =
+ Right $ AddMatching $ compileGlob s CaseSensative
+ | otherwise = Right $ AddRef $ Ref s
+ mk ('-':s) = Right $ RemoveMatching $ compileGlob s CaseSensative
+ mk s = Left $ "bad refspec item \"" ++ s ++ "\" (expected + or - prefix)"
+
+applyRefSpec :: RefSpec -> [Ref] -> [Ref]
+applyRefSpec refspec rs = go [] refspec
+ where
+ go c [] = reverse c
+ go c (AddRef r : rest) = go (r:c) rest
+ go c (AddMatching g : rest) =
+ let add = filter (matchGlob g . fromRef) rs
+ in go (add ++ c) rest
+ go c (RemoveMatching g : rest) =
+ go (filter (not . matchGlob g . fromRef) c) rest
diff --git a/debian/changelog b/debian/changelog
index 6b5c8dff9..cdb371b73 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -18,6 +18,8 @@ git-annex (5.20150508.2) UNRELEASED; urgency=medium
running at once.
* Stale transfer lock and info files will be cleaned up automatically
when get/unused/info commands are run.
+ * unused: Add --used option, which can specify a set of refs to consider
+ used, rather than the default of considering all refs used.
-- Joey Hess <id@joeyh.name> Mon, 11 May 2015 12:45:06 -0400
diff --git a/doc/git-annex-unused.mdwn b/doc/git-annex-unused.mdwn
index d59ef2abc..fbb371995 100644
--- a/doc/git-annex-unused.mdwn
+++ b/doc/git-annex-unused.mdwn
@@ -26,7 +26,37 @@ For example, to move all unused data to origin:
* `--from=remote`
- Check for unused data on a remote.
+ Check for unused data that is located on a remote.
+
+* `--used-refspec=+ref:-ref`
+
+ By default, any data that the work tree uses, or that any refs in the git
+ repository point to is considered to be used. If you only want to use
+ some refs, you can use this option to specify the ones to use. Data that
+ is not in the specified refs (and not used by the work tree) will then be
+ considered unused.
+
+# REFSPEC
+
+The refspec format for --used-refspec is a colon-separated list of
+additions and removals of refs. For example:
+
+ +refs/heads/*:+HEAD^:+refs/tags/*:-refs/tags/old-tag
+
+This adds all refs/heads/ refs, as well as the previous version
+of HEAD. It also adds all tags, except for old-tag.
+
+This refspec is processed by starting with an empty set of refs,
+and walking the list in order from left to right.
+
+* Each + using a glob is matched against all relevant refs
+ (a subset of `git show-ref`) and all matching refs are added
+ to the set.
+ For example, "+refs/remotes/*" adds all remote refs.
+* Each + without a glob adds the literal value to the set.
+ For example, "+HEAD^" adds "HEAD^".
+* Each - is matched against the set of refs accumulated so far.
+ Any matching refs are removed from the set.
# SEE ALSO
diff --git a/doc/todo/unused_by_refspec.mdwn b/doc/todo/unused_by_refspec.mdwn
index 781425264..ea84599bb 100644
--- a/doc/todo/unused_by_refspec.mdwn
+++ b/doc/todo/unused_by_refspec.mdwn
@@ -33,4 +33,9 @@ refspec in order.
the SHAs that the refs point to, so -refs/heads/master does not remove
+HEAD).
+Hmm, unused currently does a separate pass to find files used in the work
+tree. I think it's best to keep that as-is.
+
--[[Joey]]
+
+> [[done]] --[[Joey]]