summaryrefslogtreecommitdiff
path: root/Command/Unused.hs
blob: 9fdf4cda65f440e02c0a3bfec7b3d786c2468c83 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
{- git-annex command
 -
 - Copyright 2010 Joey Hess <joey@kitenet.net>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

module Command.Unused where

import Control.Monad.State (liftIO)
import qualified Data.Map as M
import Data.Maybe

import Command
import Types
import Content
import Messages
import Locations
import qualified Annex
import qualified GitRepo as Git
import qualified Backend

command :: [Command]
command = [Command "unused" paramNothing seek "look for unused file content"]

seek :: [CommandSeek]
seek = [withNothing start]

{- Finds unused content in the annex. -} 
start :: CommandStartNothing
start = do
	showStart "unused" ""
	return $ Just perform

perform :: CommandPerform
perform = do
	_ <- checkUnused
	return $ Just $ return True

checkUnused :: Annex Bool
checkUnused = do
	showNote "checking for unused data..."
	unused <- unusedKeys
	let list = number 1 unused
	g <- Annex.gitRepo
	liftIO $ writeFile (annexUnusedLog g) $ unlines $ 
		map (\(n, k) -> show n ++ " " ++ show k) list
	if null unused
		then return True
		else do
			showLongNote $ w list
			return False
	where
		w u = unlines $
			["Some annexed data is no longer pointed to by any files in the repository:",
			 "  NUMBER  KEY"]
			++ map cols u ++
			["(To see where data was previously used, try: git log --stat -S'KEY')",
			 "(To remove unwanted data: git-annex dropunused NUMBER)",
			 ""]
		cols (n,k) = "  " ++ pad 6 (show n) ++ "  " ++ show k
		pad n s = s ++ replicate (n - length s) ' '

number :: Integer -> [a] -> [(Integer, a)]
number _ [] = []
number n (x:xs) = (n, x):(number (n+1) xs)

{- Finds keys whose content is present, but that do not seem to be used
 - by any files in the git repo. -}
unusedKeys :: Annex [Key]
unusedKeys = do
	present <- getKeysPresent
	referenced <- getKeysReferenced
	
	-- Constructing a single map, of the set that tends to be smaller,
	-- appears more efficient in both memory and CPU than constructing
	-- and taking the M.difference of two maps.
	let present_m = existsMap present
	let unused_m = remove referenced present_m
	return $ M.keys unused_m
	where
		remove a b = foldl (flip M.delete) b a

existsMap :: Ord k => [k] -> M.Map k Int
existsMap l = M.fromList $ map (\k -> (k, 1)) l

{- List of keys referenced by symlinks in the git repo. -}
getKeysReferenced :: Annex [Key]
getKeysReferenced = do
	g <- Annex.gitRepo
	files <- liftIO $ Git.inRepo g [Git.workTree g]
	keypairs <- mapM Backend.lookupFile files
	return $ map fst $ catMaybes keypairs