summaryrefslogtreecommitdiff
path: root/Command/Unused.hs
blob: 4ffa6a17f89842c7b9806b5de2b3c10103451d0e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
{- git-annex command
 -
 - Copyright 2010 Joey Hess <joey@kitenet.net>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

module Command.Unused where

import Control.Monad (filterM, unless)
import Control.Monad.State (liftIO)
import qualified Data.Map as M
import Data.Maybe
import System.FilePath
import System.Directory

import Command
import Types
import Content
import Messages
import Locations
import Utility
import qualified Annex
import qualified GitRepo as Git
import qualified Backend

command :: [Command]
command = [Command "unused" paramNothing seek "look for unused file content"]

seek :: [CommandSeek]
seek = [withNothing start]

{- Finds unused content in the annex. -} 
start :: CommandStartNothing
start = do
	showStart "unused" ""
	return $ Just perform

perform :: CommandPerform
perform = do
	_ <- checkUnused
	return $ Just $ return True

checkUnused :: Annex Bool
checkUnused = do
	showNote "checking for unused data..."
	(unused, staletmp) <- unusedKeys
	let unusedlist = number 0 unused
	let staletmplist = number (length unused) staletmp
	let list = unusedlist ++ staletmplist
	g <- Annex.gitRepo
	liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $ 
		map (\(n, k) -> show n ++ " " ++ show k) list
	unless (null unused) $
		showLongNote $ unusedmsg unusedlist
	unless (null staletmp) $
		showLongNote $ staletmpmsg staletmplist
	unless (null list) $
		showLongNote $ "\n"
	return $ null list

	where
		unusedmsg u = unlines $
			["Some annexed data is no longer pointed to by any files in the repository:"]
			++ table u ++
			["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
			dropmsg
		staletmpmsg t = unlines $ 
			["Some partially transferred data exists in temporary files:"]
			++ table t ++ dropmsg
		dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
			
		table l = ["  NUMBER  KEY"] ++ map cols l
		cols (n,k) = "  " ++ pad 6 (show n) ++ "  " ++ show k
		pad n s = s ++ replicate (n - length s) ' '

number :: Int -> [a] -> [(Int, a)]
number _ [] = []
number n (x:xs) = (n+1, x):(number (n+1) xs)

{- Finds keys whose content is present, but that do not seem to be used
 - by any files in the git repo, or that are only present as tmp files. -}
unusedKeys :: Annex ([Key], [Key])
unusedKeys = do
	g <- Annex.gitRepo
	present <- getKeysPresent
	referenced <- getKeysReferenced
	
	let unused = present `exclude` referenced

	-- Some tmp files may be dups copies of content that is fully present.
	-- Simply delete those, while including the keys for the rest of
	-- the temp files in the returned list for the user to deal with.
	tmps <- tmpKeys
	let staletmp = tmps `exclude` present
	let duptmp = tmps `exclude` staletmp
	_ <- liftIO $ mapM (\t -> removeFile $ gitAnnexTmpLocation g t) duptmp

	return (unused, staletmp)

	where
		-- Constructing a single map, of the set that tends to be
		-- smaller, appears more efficient in both memory and CPU
		-- than constructing and taking the M.difference of two maps.
		exclude [] _ = [] -- optimisation
		exclude smaller larger = M.keys $ remove larger $ existsMap smaller

		existsMap :: Ord k => [k] -> M.Map k Int
		existsMap l = M.fromList $ map (\k -> (k, 1)) l

		remove a b = foldl (flip M.delete) b a

{- List of keys referenced by symlinks in the git repo. -}
getKeysReferenced :: Annex [Key]
getKeysReferenced = do
	g <- Annex.gitRepo
	files <- liftIO $ Git.inRepo g [Git.workTree g]
	keypairs <- mapM Backend.lookupFile files
	return $ map fst $ catMaybes keypairs

{- List of keys that have temp files in the git repo. -}
tmpKeys :: Annex [Key]
tmpKeys = do
	g <- Annex.gitRepo
	let tmp = gitAnnexTmpDir g
	exists <- liftIO $ doesDirectoryExist tmp
	if (not exists)
		then return []
		else do
			contents <- liftIO $ getDirectoryContents tmp
			files <- liftIO $ filterM doesFileExist $
				map (tmp </>) contents
			return $ map (fileKey . takeFileName) files