summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Command/Unused.hs49
-rw-r--r--Test.hs52
2 files changed, 78 insertions, 23 deletions
diff --git a/Command/Unused.hs b/Command/Unused.hs
index bb5d7c685..84be0eefb 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -32,8 +32,11 @@ import Types.Key
import Types.RefSpec
import Git.Types
import Git.Sha
+import Git.FilePath
import Logs.View (is_branchView)
import Annex.BloomFilter
+import qualified Database.Keys
+import Annex.InodeSentinal
cmd :: Command
cmd = command "unused" SectionMaintenance "look for unused file content"
@@ -156,23 +159,29 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\
-
- Strategy:
-
- - Pass keys through 3 bloom filters in order, only creating each bloom
+ - Pass keys through these filters in order, only creating each bloom
- filter on demand if the previous one didn't filter out all keys.
-
- - 1. All keys referenced by files in the work tree.
+ - 1. Bloom filter containing all keys referenced by files in the work tree.
- This is the fastest one to build and will filter out most keys.
- - 2. All keys in the diff from the work tree to the index.
- - 3. All keys in the diffs between the index and branches matching the
- - RefSpec. (This can take quite a while).
+ - 2. Bloom filter containing all keys in the diff from the work tree to
+ - the index.
+ - 3. Associated files filter. A v6 unlocked file may have had its content
+ - added to the annex (by eg, git diff running the smudge filter),
+ - but the new key is not yet staged in the index. But if so, it will
+ - have an associated file.
+ - 4. Bloom filter containing all keys in the diffs between the index and
+ - branches matching the RefSpec. (This can take quite a while to build).
-}
excludeReferenced :: RefSpec -> [Key] -> Annex [Key]
-excludeReferenced refspec ks =
- runfilter withKeysReferencedM ks
- >>= runfilter withKeysReferencedDiffIndex
- >>= runfilter (withKeysReferencedDiffGitRefs refspec)
+excludeReferenced refspec ks = runbloomfilter withKeysReferencedM ks
+ >>= runbloomfilter withKeysReferencedDiffIndex
+ >>= runfilter associatedFilesFilter
+ >>= runbloomfilter (withKeysReferencedDiffGitRefs refspec)
where
runfilter _ [] = return [] -- optimisation
- runfilter a l = bloomFilter l <$> genBloomFilter a
+ runfilter a l = a l
+ runbloomfilter a = runfilter $ \l -> bloomFilter l <$> genBloomFilter a
{- Given an initial value, folds it with each key referenced by
- files in the working tree. -}
@@ -269,6 +278,24 @@ withKeysReferencedDiff a getdiff extractsha = do
(parseLinkOrPointer <$> catObject sha)
>>= maybe noop a
+{- Filters out keys that have an associated file that's not modified. -}
+associatedFilesFilter :: [Key] -> Annex [Key]
+associatedFilesFilter = filterM go
+ where
+ go k = do
+ cs <- Database.Keys.getInodeCaches k
+ if null cs
+ then return True
+ else checkunmodified cs
+ =<< Database.Keys.getAssociatedFiles k
+ checkunmodified _ [] = return True
+ checkunmodified cs (f:fs) = do
+ relf <- fromRepo $ fromTopFilePath f
+ ifM (sameInodeCache relf cs)
+ ( return False
+ , checkunmodified cs fs
+ )
+
data UnusedMaps = UnusedMaps
{ unusedMap :: UnusedMap
, unusedBadMap :: UnusedMap
diff --git a/Test.hs b/Test.hs
index ba491f3d8..9f7c7e328 100644
--- a/Test.hs
+++ b/Test.hs
@@ -1,6 +1,6 @@
{- git-annex test suite
-
- - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -369,9 +369,7 @@ test_reinject = intmpclonerepoInDirect $ do
git_annex "drop" ["--force", sha1annexedfile] @? "drop failed"
annexed_notpresent sha1annexedfile
writeFile tmp $ content sha1annexedfile
- r <- annexeval $ Types.Backend.getKey backendSHA1
- Types.KeySource.KeySource { Types.KeySource.keyFilename = tmp, Types.KeySource.contentLocation = tmp, Types.KeySource.inodeCache = Nothing }
- let key = Types.Key.key2file $ fromJust r
+ key <- Types.Key.key2file <$> getKey backendSHA1 tmp
git_annex "reinject" [tmp, sha1annexedfile] @? "reinject failed"
annexed_present sha1annexedfile
git_annex "fromkey" [key, sha1annexedfiledup] @? "fromkey failed for dup"
@@ -789,11 +787,10 @@ test_unused :: Assertion
-- This test is broken in direct mode
test_unused = intmpclonerepoInDirect $ do
checkunused [] "in new clone"
- -- keys have to be looked up before files are removed
- annexedfilekey <- annexeval $ findkey annexedfile
- sha1annexedfilekey <- annexeval $ findkey sha1annexedfile
git_annex "get" [annexedfile] @? "get of file failed"
git_annex "get" [sha1annexedfile] @? "get of file failed"
+ annexedfilekey <- getKey backendSHA256E annexedfile
+ sha1annexedfilekey <- getKey backendSHA1 sha1annexedfile
checkunused [] "after get"
boolSystem "git" [Param "rm", Param "-fq", File annexedfile] @? "git rm failed"
checkunused [] "after rm"
@@ -820,7 +817,7 @@ test_unused = intmpclonerepoInDirect $ do
-- and pointed at annexed content, and think that content was unused
writeFile "unusedfile" "unusedcontent"
git_annex "add" ["unusedfile"] @? "add of unusedfile failed"
- unusedfilekey <- annexeval $ findkey "unusedfile"
+ unusedfilekey <- getKey backendSHA256E "unusedfile"
renameFile "unusedfile" "unusedunstagedfile"
boolSystem "git" [Param "rm", Param "-qf", File "unusedfile"] @? "git rm failed"
checkunused [] "with unstaged link"
@@ -832,7 +829,7 @@ test_unused = intmpclonerepoInDirect $ do
writeFile "unusedfile" "unusedcontent"
git_annex "add" ["unusedfile"] @? "add of unusedfile failed"
boolSystem "git" [Param "add", File "unusedfile"] @? "git add failed"
- unusedfilekey' <- annexeval $ findkey "unusedfile"
+ unusedfilekey' <- getKey backendSHA256E "unusedfile"
checkunused [] "with staged deleted link"
boolSystem "git" [Param "rm", Param "-qf", File "unusedfile"] @? "git rm failed"
checkunused [unusedfilekey'] "with staged link deleted"
@@ -846,6 +843,27 @@ test_unused = intmpclonerepoInDirect $ do
removeFile "unusedfile"
checkunused [] "with staged deleted file"
+ -- When an unlocked file is modified, git diff will cause git-annex
+ -- to add its content to the repository. Make sure that's not
+ -- found as unused.
+ whenM (unlockedFiles <$> getTestMode) $ do
+ let f = "unlockedfile"
+ writeFile f "unlockedcontent1"
+ boolSystem "git" [Param "add", File "unlockedfile"] @? "git add failed"
+ checkunused [] "with unlocked file before modification"
+ writeFile f "unlockedcontent2"
+ checkunused [] "with unlocked file after modification"
+ not <$> boolSystem "git" [Param "diff", Param "--quiet", File f] @? "git diff did not show changes to unlocked file"
+ ver2key <- getKey backendSHA256E "unlockedfile"
+ -- still nothing unused because one version is in the index
+ -- and the other is in the work tree
+ checkunused [] "with unlocked file after git diff"
+ writeFile f "unlockedcontent3"
+ -- original version is still in index; version 2 is unused
+ -- now, and version 3 is in work tree
+ checkunused [ver2key] "with unlocked file after second modification"
+ not <$> boolSystem "git" [Param "diff", Param "--quiet", File f] @? "git diff did not show changes to unlocked file"
+ checkunused [ver2key] "with unlocked file after second git diff"
where
checkunused expectedkeys desc = do
git_annex "unused" [] @? "unused failed"
@@ -853,9 +871,6 @@ test_unused = intmpclonerepoInDirect $ do
let unusedkeys = M.elems unusedmap
assertEqual ("unused keys differ " ++ desc)
(sort expectedkeys) (sort unusedkeys)
- findkey f = do
- r <- Annex.WorkTree.lookupFile f
- return $ fromJust r
test_describe :: Assertion
test_describe = intmpclonerepo $ do
@@ -1976,10 +1991,23 @@ backendSHA1 = backend_ "SHA1"
backendSHA256 :: Types.Backend
backendSHA256 = backend_ "SHA256"
+backendSHA256E :: Types.Backend
+backendSHA256E = backend_ "SHA256E"
+
backendWORM :: Types.Backend
backendWORM = backend_ "WORM"
backend_ :: String -> Types.Backend
backend_ = Backend.lookupBackendName
+getKey :: Types.Backend -> FilePath -> IO Types.Key
+getKey b f = fromJust <$> annexeval go
+ where
+ go = Types.Backend.getKey b
+ Types.KeySource.KeySource
+ { Types.KeySource.keyFilename = f
+ , Types.KeySource.contentLocation = f
+ , Types.KeySource.inodeCache = Nothing
+ }
+
#endif