diff options
author | Joey Hess <joeyh@joeyh.name> | 2016-01-06 22:11:21 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2016-01-06 22:11:21 -0400 |
commit | 4a05793349c55477cde81455810bfe57444ae9bf (patch) | |
tree | cb8ef248836d8b7bd1c41b659e2ab9b1c519cd68 /Command | |
parent | d11b032bd86ebe69f1d08e382bd83370db8ea9b9 (diff) |
unused: deal with v6 unlocked file that is implicitly ingested by git diff etc
Diffstat (limited to 'Command')
-rw-r--r-- | Command/Unused.hs | 49 |
1 files changed, 38 insertions, 11 deletions
diff --git a/Command/Unused.hs b/Command/Unused.hs index bb5d7c685..84be0eefb 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2015 Joey Hess <id@joeyh.name> + - Copyright 2010-2016 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -32,8 +32,11 @@ import Types.Key import Types.RefSpec import Git.Types import Git.Sha +import Git.FilePath import Logs.View (is_branchView) import Annex.BloomFilter +import qualified Database.Keys +import Annex.InodeSentinal cmd :: Command cmd = command "unused" SectionMaintenance "look for unused file content" @@ -156,23 +159,29 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\ - - Strategy: - - - Pass keys through 3 bloom filters in order, only creating each bloom + - Pass keys through these filters in order, only creating each bloom - filter on demand if the previous one didn't filter out all keys. - - - 1. All keys referenced by files in the work tree. + - 1. Bloom filter containing all keys referenced by files in the work tree. - This is the fastest one to build and will filter out most keys. - - 2. All keys in the diff from the work tree to the index. - - 3. All keys in the diffs between the index and branches matching the - - RefSpec. (This can take quite a while). + - 2. Bloom filter containing all keys in the diff from the work tree to + - the index. + - 3. Associated files filter. A v6 unlocked file may have had its content + - added to the annex (by eg, git diff running the smudge filter), + - but the new key is not yet staged in the index. But if so, it will + - have an associated file. + - 4. Bloom filter containing all keys in the diffs between the index and + - branches matching the RefSpec. (This can take quite a while to build). -} excludeReferenced :: RefSpec -> [Key] -> Annex [Key] -excludeReferenced refspec ks = - runfilter withKeysReferencedM ks - >>= runfilter withKeysReferencedDiffIndex - >>= runfilter (withKeysReferencedDiffGitRefs refspec) +excludeReferenced refspec ks = runbloomfilter withKeysReferencedM ks + >>= runbloomfilter withKeysReferencedDiffIndex + >>= runfilter associatedFilesFilter + >>= runbloomfilter (withKeysReferencedDiffGitRefs refspec) where runfilter _ [] = return [] -- optimisation - runfilter a l = bloomFilter l <$> genBloomFilter a + runfilter a l = a l + runbloomfilter a = runfilter $ \l -> bloomFilter l <$> genBloomFilter a {- Given an initial value, folds it with each key referenced by - files in the working tree. -} @@ -269,6 +278,24 @@ withKeysReferencedDiff a getdiff extractsha = do (parseLinkOrPointer <$> catObject sha) >>= maybe noop a +{- Filters out keys that have an associated file that's not modified. -} +associatedFilesFilter :: [Key] -> Annex [Key] +associatedFilesFilter = filterM go + where + go k = do + cs <- Database.Keys.getInodeCaches k + if null cs + then return True + else checkunmodified cs + =<< Database.Keys.getAssociatedFiles k + checkunmodified _ [] = return True + checkunmodified cs (f:fs) = do + relf <- fromRepo $ fromTopFilePath f + ifM (sameInodeCache relf cs) + ( return False + , checkunmodified cs fs + ) + data UnusedMaps = UnusedMaps { unusedMap :: UnusedMap , unusedBadMap :: UnusedMap |