diff options
-rw-r--r-- | CmdLine/Usage.hs | 2 | ||||
-rw-r--r-- | Command/Unused.hs | 56 | ||||
-rw-r--r-- | Types/RefSpec.hs | 44 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | doc/git-annex-unused.mdwn | 32 | ||||
-rw-r--r-- | doc/todo/unused_by_refspec.mdwn | 5 |
6 files changed, 116 insertions, 25 deletions
diff --git a/CmdLine/Usage.hs b/CmdLine/Usage.hs index 82619a304..ad1d4e583 100644 --- a/CmdLine/Usage.hs +++ b/CmdLine/Usage.hs @@ -95,6 +95,8 @@ paramFile :: String paramFile = "FILE" paramRef :: String paramRef = "REF" +paramRefSpec :: String +paramRefSpec = "REFSPEC" paramGroup :: String paramGroup = "GROUP" paramExpression :: String diff --git a/Command/Unused.hs b/Command/Unused.hs index 4bbde4da4..a5698c833 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -31,34 +31,41 @@ import qualified Remote import qualified Annex.Branch import Annex.CatFile import Types.Key +import Types.RefSpec import Git.FilePath import Logs.View (is_branchView) import Utility.Bloom cmd :: [Command] -cmd = [withOptions [unusedFromOption] $ command "unused" paramNothing seek - SectionMaintenance "look for unused file content"] +cmd = [withOptions [unusedFromOption, refSpecOption] $ + command "unused" paramNothing seek + SectionMaintenance "look for unused file content"] unusedFromOption :: Option unusedFromOption = fieldOption ['f'] "from" paramRemote "remote to check for unused content" +refSpecOption :: Option +refSpecOption = fieldOption [] "used-refspec" paramRefSpec "refs to consider used (default: all refs)" + seek :: CommandSeek seek = withNothing start {- Finds unused content in the annex. -} start :: CommandStart start = do - from <- Annex.getField $ optionName unusedFromOption + !refspec <- maybe allRefSpec (either error id . parseRefSpec) + <$> Annex.getField (optionName refSpecOption) + from <- Annex.getField (optionName unusedFromOption) let (name, action) = case from of - Nothing -> (".", checkUnused) - Just "." -> (".", checkUnused) - Just "here" -> (".", checkUnused) - Just n -> (n, checkRemoteUnused n) + Nothing -> (".", checkUnused refspec) + Just "." -> (".", checkUnused refspec) + Just "here" -> (".", checkUnused refspec) + Just n -> (n, checkRemoteUnused n refspec) showStart "unused" name next action -checkUnused :: CommandPerform -checkUnused = chain 0 +checkUnused :: RefSpec -> CommandPerform +checkUnused refspec = chain 0 [ check "" unusedMsg $ findunused =<< Annex.getState Annex.fast , check "bad" staleBadMsg $ staleKeysPrune gitAnnexBadDir False , check "tmp" staleTmpMsg $ staleKeysPrune gitAnnexTmpObjectDir True @@ -71,20 +78,20 @@ checkUnused = chain 0 showAction "checking for unused data" -- InAnnex, not InRepository because if a direct mode -- file exists, it is obviously not unused. - excludeReferenced =<< getKeysPresent InAnnex + excludeReferenced refspec =<< getKeysPresent InAnnex chain _ [] = next $ return True chain v (a:as) = do v' <- a v chain v' as -checkRemoteUnused :: String -> CommandPerform -checkRemoteUnused name = go =<< fromJust <$> Remote.byNameWithUUID (Just name) +checkRemoteUnused :: String -> RefSpec -> CommandPerform +checkRemoteUnused name refspec = go =<< fromJust <$> Remote.byNameWithUUID (Just name) where go r = do showAction "checking for unused data" _ <- check "" (remoteUnusedMsg r) (remoteunused r) 0 next $ return True - remoteunused r = excludeReferenced <=< loggedKeysFor $ Remote.uuid r + remoteunused r = excludeReferenced refspec <=< loggedKeysFor $ Remote.uuid r check :: FilePath -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int check file msg a c = do @@ -145,7 +152,7 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\ - * Build a bloom filter of all keys referenced by symlinks. This - is the fastest one to build and will filter out most keys. - * If keys remain, build a second bloom filter of keys referenced by - - all branches. + - branches maching the RefSpec. - * The list is streamed through these bloom filters lazily, so both will - exist at the same time. This means that twice the memory is used, - but they're relatively small, so the added complexity of using a @@ -157,13 +164,13 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\ - Short-circuiting if the first filter filters all the keys handles the - other common case. -} -excludeReferenced :: [Key] -> Annex [Key] -excludeReferenced ks = runfilter firstlevel ks >>= runfilter secondlevel +excludeReferenced :: RefSpec -> [Key] -> Annex [Key] +excludeReferenced refspec ks = runfilter firstlevel ks >>= runfilter secondlevel where runfilter _ [] = return [] -- optimisation runfilter a l = bloomFilter show l <$> genBloomFilter show a firstlevel = withKeysReferencedM - secondlevel = withKeysReferencedInGit + secondlevel = withKeysReferencedInGit refspec {- Finds items in the first, smaller list, that are not - present in the second, larger list. @@ -258,14 +265,15 @@ withKeysReferenced' mdir initial a = do !v' <- a k f v go v' fs -withKeysReferencedInGit :: (Key -> Annex ()) -> Annex () -withKeysReferencedInGit a = do +withKeysReferencedInGit :: RefSpec -> (Key -> Annex ()) -> Annex () +withKeysReferencedInGit refspec a = do current <- inRepo Git.Branch.currentUnsafe shaHead <- maybe (return Nothing) (inRepo . Git.Ref.sha) current - showref >>= mapM_ (withKeysReferencedInGitRef a) . - relevantrefs (shaHead, current) + usedrefs <- applyRefSpec refspec . relevantrefs (shaHead, current) + <$> inRepo (Git.Command.pipeReadStrict [Param "show-ref"]) + forM_ usedrefs $ + withKeysReferencedInGitRef a where - showref = inRepo $ Git.Command.pipeReadStrict [Param "show-ref"] relevantrefs headRef = addHead headRef . filter ourbranches . map (separate (== ' ')) . @@ -293,8 +301,8 @@ withKeysReferencedInGitRef a ref = do showAction $ "checking " ++ Git.Ref.describe ref bare <- isBareRepo (ts,clean) <- inRepo $ if bare - then DiffTree.diffIndex ref - else DiffTree.diffWorkTree ref + then DiffTree.diffIndex ref + else DiffTree.diffWorkTree ref let lookAtWorkingTree = not bare && ref == Git.Ref.headRef forM_ ts $ tKey lookAtWorkingTree >=> maybe noop a liftIO $ void clean diff --git a/Types/RefSpec.hs b/Types/RefSpec.hs new file mode 100644 index 000000000..42f4c6226 --- /dev/null +++ b/Types/RefSpec.hs @@ -0,0 +1,44 @@ +{- This is not the same as git's fetch/push refspecs. + - + - Copyright 2015 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.RefSpec where + +import Common +import Utility.Glob +import Git.Types + +import Data.Either + +type RefSpec = [RefSpecPart] + +data RefSpecPart = AddRef Ref | AddMatching Glob | RemoveMatching Glob + +allRefSpec :: RefSpec +allRefSpec = [AddMatching $ compileGlob "*" CaseSensative] + +parseRefSpec :: String -> Either String RefSpec +parseRefSpec v = case partitionEithers (map mk $ split ":" v) of + ([],refspec) -> Right refspec + (e:_,_) -> Left e + where + mk ('+':s) + | any (`elem` s) "*?" = + Right $ AddMatching $ compileGlob s CaseSensative + | otherwise = Right $ AddRef $ Ref s + mk ('-':s) = Right $ RemoveMatching $ compileGlob s CaseSensative + mk s = Left $ "bad refspec item \"" ++ s ++ "\" (expected + or - prefix)" + +applyRefSpec :: RefSpec -> [Ref] -> [Ref] +applyRefSpec refspec rs = go [] refspec + where + go c [] = reverse c + go c (AddRef r : rest) = go (r:c) rest + go c (AddMatching g : rest) = + let add = filter (matchGlob g . fromRef) rs + in go (add ++ c) rest + go c (RemoveMatching g : rest) = + go (filter (not . matchGlob g . fromRef) c) rest diff --git a/debian/changelog b/debian/changelog index 6b5c8dff9..cdb371b73 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,6 +18,8 @@ git-annex (5.20150508.2) UNRELEASED; urgency=medium running at once. * Stale transfer lock and info files will be cleaned up automatically when get/unused/info commands are run. + * unused: Add --used option, which can specify a set of refs to consider + used, rather than the default of considering all refs used. -- Joey Hess <id@joeyh.name> Mon, 11 May 2015 12:45:06 -0400 diff --git a/doc/git-annex-unused.mdwn b/doc/git-annex-unused.mdwn index d59ef2abc..fbb371995 100644 --- a/doc/git-annex-unused.mdwn +++ b/doc/git-annex-unused.mdwn @@ -26,7 +26,37 @@ For example, to move all unused data to origin: * `--from=remote` - Check for unused data on a remote. + Check for unused data that is located on a remote. + +* `--used-refspec=+ref:-ref` + + By default, any data that the work tree uses, or that any refs in the git + repository point to is considered to be used. If you only want to use + some refs, you can use this option to specify the ones to use. Data that + is not in the specified refs (and not used by the work tree) will then be + considered unused. + +# REFSPEC + +The refspec format for --used-refspec is a colon-separated list of +additions and removals of refs. For example: + + +refs/heads/*:+HEAD^:+refs/tags/*:-refs/tags/old-tag + +This adds all refs/heads/ refs, as well as the previous version +of HEAD. It also adds all tags, except for old-tag. + +This refspec is processed by starting with an empty set of refs, +and walking the list in order from left to right. + +* Each + using a glob is matched against all relevant refs + (a subset of `git show-ref`) and all matching refs are added + to the set. + For example, "+refs/remotes/*" adds all remote refs. +* Each + without a glob adds the literal value to the set. + For example, "+HEAD^" adds "HEAD^". +* Each - is matched against the set of refs accumulated so far. + Any matching refs are removed from the set. # SEE ALSO diff --git a/doc/todo/unused_by_refspec.mdwn b/doc/todo/unused_by_refspec.mdwn index 781425264..ea84599bb 100644 --- a/doc/todo/unused_by_refspec.mdwn +++ b/doc/todo/unused_by_refspec.mdwn @@ -33,4 +33,9 @@ refspec in order. the SHAs that the refs point to, so -refs/heads/master does not remove +HEAD). +Hmm, unused currently does a separate pass to find files used in the work +tree. I think it's best to keep that as-is. + --[[Joey]] + +> [[done]] --[[Joey]] |