1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
{- git-annex command
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Unused where
import Control.Monad (filterM, unless, forM_)
import Control.Monad.State (liftIO)
import qualified Data.Set as S
import Data.Maybe
import System.FilePath
import System.Directory
import Command
import Types
import Content
import Messages
import Locations
import Utility
import qualified Annex
import qualified GitRepo as Git
import qualified Backend
command :: [Command]
command = [repoCommand "unused" paramNothing seek
"look for unused file content"]
seek :: [CommandSeek]
seek = [withNothing start]
{- Finds unused content in the annex. -}
start :: CommandStartNothing
start = notBareRepo $ do
showStart "unused" ""
return $ Just perform
perform :: CommandPerform
perform = do
_ <- checkUnused
return $ Just $ return True
checkUnused :: Annex Bool
checkUnused = do
(unused, staletmp) <- unusedKeys
let unusedlist = number 0 unused
let staletmplist = number (length unused) staletmp
let list = unusedlist ++ staletmplist
g <- Annex.gitRepo
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $
map (\(n, k) -> show n ++ " " ++ show k) list
unless (null unused) $ showLongNote $ unusedmsg unusedlist
unless (null staletmp) $ showLongNote $ staletmpmsg staletmplist
unless (null list) $ showLongNote $ "\n"
return $ null list
where
unusedmsg u = unlines $
["Some annexed data is no longer pointed to by any files in the repository:"]
++ table u ++
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
dropmsg
staletmpmsg t = unlines $
["Some partially transferred data exists in temporary files:"]
++ table t ++ dropmsg
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
table l = [" NUMBER KEY"] ++ map cols l
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
pad n s = s ++ replicate (n - length s) ' '
number :: Int -> [a] -> [(Int, a)]
number _ [] = []
number n (x:xs) = (n+1, x):(number (n+1) xs)
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo, or that are only present as tmp files. -}
unusedKeys :: Annex ([Key], [Key])
unusedKeys = do
g <- Annex.gitRepo
fast <- Annex.getState Annex.fast
if fast
then do
showNote "fast mode enabled; only finding temporary files"
tmps <- tmpKeys
return ([], tmps)
else do
showNote "checking for unused data..."
present <- getKeysPresent
referenced <- getKeysReferenced
tmps <- tmpKeys
let (unused, staletmp, duptmp) = calcUnusedKeys present referenced tmps
-- Tmp files that are dups of content already present
-- can simply be removed.
liftIO $ forM_ duptmp $ \t -> removeFile $
gitAnnexTmpLocation g t
return (unused, staletmp)
calcUnusedKeys :: [Key] -> [Key] -> [Key] -> ([Key], [Key], [Key])
calcUnusedKeys present referenced tmps = (unused, staletmp, duptmp)
where
unused = present `exclude` referenced
staletmp = tmps `exclude` present
duptmp = tmps `exclude` staletmp
-- Constructing a single set, of the list that tends to be
-- smaller, appears more efficient in both memory and CPU
-- than constructing and taking the S.difference of two sets.
exclude [] _ = [] -- optimisation
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
remove a b = foldl (flip S.delete) b a
{- List of keys referenced by symlinks in the git repo. -}
getKeysReferenced :: Annex [Key]
getKeysReferenced = do
g <- Annex.gitRepo
files <- liftIO $ Git.inRepo g [Git.workTree g]
keypairs <- mapM Backend.lookupFile files
return $ map fst $ catMaybes keypairs
{- List of keys that have temp files in the git repo. -}
tmpKeys :: Annex [Key]
tmpKeys = do
g <- Annex.gitRepo
let tmp = gitAnnexTmpDir g
exists <- liftIO $ doesDirectoryExist tmp
if (not exists)
then return []
else do
contents <- liftIO $ getDirectoryContents tmp
files <- liftIO $ filterM doesFileExist $
map (tmp </>) contents
return $ catMaybes $ map (fileKey . takeFileName) files
|