summaryrefslogtreecommitdiff
path: root/Command/Unused.hs
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-01-28 14:10:50 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-01-28 14:10:50 -0400
commite6da7eb1770e506661d8c6755736ed285a17554a (patch)
tree170d6a8c3a73a84f7d44de3580a774837fc8cf8f /Command/Unused.hs
parent04fe906ac6e611fd59ef44244a01e8fe61abec6f (diff)
Improved temp file handling
* Improved temp file handling. Transfers of content can now be resumed from temp files later; the resume does not have to be the immediate next git-annex run. * unused: Include partially transferred content in the list.
Diffstat (limited to 'Command/Unused.hs')
-rw-r--r--Command/Unused.hs93
1 files changed, 66 insertions, 27 deletions
diff --git a/Command/Unused.hs b/Command/Unused.hs
index b9dc62a32..4ffa6a17f 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -7,9 +7,12 @@
module Command.Unused where
+import Control.Monad (filterM, unless)
import Control.Monad.State (liftIO)
import qualified Data.Map as M
import Data.Maybe
+import System.FilePath
+import System.Directory
import Command
import Types
@@ -41,49 +44,71 @@ perform = do
checkUnused :: Annex Bool
checkUnused = do
showNote "checking for unused data..."
- unused <- unusedKeys
- let list = number 1 unused
+ (unused, staletmp) <- unusedKeys
+ let unusedlist = number 0 unused
+ let staletmplist = number (length unused) staletmp
+ let list = unusedlist ++ staletmplist
g <- Annex.gitRepo
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $
map (\(n, k) -> show n ++ " " ++ show k) list
- if null unused
- then return True
- else do
- showLongNote $ w list
- return False
+ unless (null unused) $
+ showLongNote $ unusedmsg unusedlist
+ unless (null staletmp) $
+ showLongNote $ staletmpmsg staletmplist
+ unless (null list) $
+ showLongNote $ "\n"
+ return $ null list
+
where
- w u = unlines $
- ["Some annexed data is no longer pointed to by any files in the repository:",
- " NUMBER KEY"]
- ++ map cols u ++
- ["(To see where data was previously used, try: git log --stat -S'KEY')",
- "(To remove unwanted data: git-annex dropunused NUMBER)",
- ""]
+ unusedmsg u = unlines $
+ ["Some annexed data is no longer pointed to by any files in the repository:"]
+ ++ table u ++
+ ["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
+ dropmsg
+ staletmpmsg t = unlines $
+ ["Some partially transferred data exists in temporary files:"]
+ ++ table t ++ dropmsg
+ dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
+
+ table l = [" NUMBER KEY"] ++ map cols l
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
pad n s = s ++ replicate (n - length s) ' '
-number :: Integer -> [a] -> [(Integer, a)]
+number :: Int -> [a] -> [(Int, a)]
number _ [] = []
-number n (x:xs) = (n, x):(number (n+1) xs)
+number n (x:xs) = (n+1, x):(number (n+1) xs)
{- Finds keys whose content is present, but that do not seem to be used
- - by any files in the git repo. -}
-unusedKeys :: Annex [Key]
+ - by any files in the git repo, or that are only present as tmp files. -}
+unusedKeys :: Annex ([Key], [Key])
unusedKeys = do
+ g <- Annex.gitRepo
present <- getKeysPresent
referenced <- getKeysReferenced
- -- Constructing a single map, of the set that tends to be smaller,
- -- appears more efficient in both memory and CPU than constructing
- -- and taking the M.difference of two maps.
- let present_m = existsMap present
- let unused_m = remove referenced present_m
- return $ M.keys unused_m
+ let unused = present `exclude` referenced
+
+ -- Some tmp files may be dups copies of content that is fully present.
+ -- Simply delete those, while including the keys for the rest of
+ -- the temp files in the returned list for the user to deal with.
+ tmps <- tmpKeys
+ let staletmp = tmps `exclude` present
+ let duptmp = tmps `exclude` staletmp
+ _ <- liftIO $ mapM (\t -> removeFile $ gitAnnexTmpLocation g t) duptmp
+
+ return (unused, staletmp)
+
where
- remove a b = foldl (flip M.delete) b a
+ -- Constructing a single map, of the set that tends to be
+ -- smaller, appears more efficient in both memory and CPU
+ -- than constructing and taking the M.difference of two maps.
+ exclude [] _ = [] -- optimisation
+ exclude smaller larger = M.keys $ remove larger $ existsMap smaller
+
+ existsMap :: Ord k => [k] -> M.Map k Int
+ existsMap l = M.fromList $ map (\k -> (k, 1)) l
-existsMap :: Ord k => [k] -> M.Map k Int
-existsMap l = M.fromList $ map (\k -> (k, 1)) l
+ remove a b = foldl (flip M.delete) b a
{- List of keys referenced by symlinks in the git repo. -}
getKeysReferenced :: Annex [Key]
@@ -92,3 +117,17 @@ getKeysReferenced = do
files <- liftIO $ Git.inRepo g [Git.workTree g]
keypairs <- mapM Backend.lookupFile files
return $ map fst $ catMaybes keypairs
+
+{- List of keys that have temp files in the git repo. -}
+tmpKeys :: Annex [Key]
+tmpKeys = do
+ g <- Annex.gitRepo
+ let tmp = gitAnnexTmpDir g
+ exists <- liftIO $ doesDirectoryExist tmp
+ if (not exists)
+ then return []
+ else do
+ contents <- liftIO $ getDirectoryContents tmp
+ files <- liftIO $ filterM doesFileExist $
+ map (tmp </>) contents
+ return $ map (fileKey . takeFileName) files