summaryrefslogtreecommitdiff
path: root/Command/Unused.hs
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-01-06 22:11:21 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-01-06 22:11:21 -0400
commit4a05793349c55477cde81455810bfe57444ae9bf (patch)
treecb8ef248836d8b7bd1c41b659e2ab9b1c519cd68 /Command/Unused.hs
parentd11b032bd86ebe69f1d08e382bd83370db8ea9b9 (diff)
unused: deal with v6 unlocked file that is implicitly ingested by git diff etc
Diffstat (limited to 'Command/Unused.hs')
-rw-r--r--Command/Unused.hs49
1 files changed, 38 insertions, 11 deletions
diff --git a/Command/Unused.hs b/Command/Unused.hs
index bb5d7c685..84be0eefb 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -32,8 +32,11 @@ import Types.Key
import Types.RefSpec
import Git.Types
import Git.Sha
+import Git.FilePath
import Logs.View (is_branchView)
import Annex.BloomFilter
+import qualified Database.Keys
+import Annex.InodeSentinal
cmd :: Command
cmd = command "unused" SectionMaintenance "look for unused file content"
@@ -156,23 +159,29 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\
-
- Strategy:
-
- - Pass keys through 3 bloom filters in order, only creating each bloom
+ - Pass keys through these filters in order, only creating each bloom
- filter on demand if the previous one didn't filter out all keys.
-
- - 1. All keys referenced by files in the work tree.
+ - 1. Bloom filter containing all keys referenced by files in the work tree.
- This is the fastest one to build and will filter out most keys.
- - 2. All keys in the diff from the work tree to the index.
- - 3. All keys in the diffs between the index and branches matching the
- - RefSpec. (This can take quite a while).
+ - 2. Bloom filter containing all keys in the diff from the work tree to
+ - the index.
+ - 3. Associated files filter. A v6 unlocked file may have had its content
+ - added to the annex (by eg, git diff running the smudge filter),
+ - but the new key is not yet staged in the index. But if so, it will
+ - have an associated file.
+ - 4. Bloom filter containing all keys in the diffs between the index and
+ - branches matching the RefSpec. (This can take quite a while to build).
-}
excludeReferenced :: RefSpec -> [Key] -> Annex [Key]
-excludeReferenced refspec ks =
- runfilter withKeysReferencedM ks
- >>= runfilter withKeysReferencedDiffIndex
- >>= runfilter (withKeysReferencedDiffGitRefs refspec)
+excludeReferenced refspec ks = runbloomfilter withKeysReferencedM ks
+ >>= runbloomfilter withKeysReferencedDiffIndex
+ >>= runfilter associatedFilesFilter
+ >>= runbloomfilter (withKeysReferencedDiffGitRefs refspec)
where
runfilter _ [] = return [] -- optimisation
- runfilter a l = bloomFilter l <$> genBloomFilter a
+ runfilter a l = a l
+ runbloomfilter a = runfilter $ \l -> bloomFilter l <$> genBloomFilter a
{- Given an initial value, folds it with each key referenced by
- files in the working tree. -}
@@ -269,6 +278,24 @@ withKeysReferencedDiff a getdiff extractsha = do
(parseLinkOrPointer <$> catObject sha)
>>= maybe noop a
+{- Filters out keys that have an associated file that's not modified. -}
+associatedFilesFilter :: [Key] -> Annex [Key]
+associatedFilesFilter = filterM go
+ where
+ go k = do
+ cs <- Database.Keys.getInodeCaches k
+ if null cs
+ then return True
+ else checkunmodified cs
+ =<< Database.Keys.getAssociatedFiles k
+ checkunmodified _ [] = return True
+ checkunmodified cs (f:fs) = do
+ relf <- fromRepo $ fromTopFilePath f
+ ifM (sameInodeCache relf cs)
+ ( return False
+ , checkunmodified cs fs
+ )
+
data UnusedMaps = UnusedMaps
{ unusedMap :: UnusedMap
, unusedBadMap :: UnusedMap