diff options
author | Joey Hess <joey@kitenet.net> | 2011-01-28 14:10:50 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2011-01-28 14:10:50 -0400 |
commit | e6da7eb1770e506661d8c6755736ed285a17554a (patch) | |
tree | 170d6a8c3a73a84f7d44de3580a774837fc8cf8f /Command/Unused.hs | |
parent | 04fe906ac6e611fd59ef44244a01e8fe61abec6f (diff) |
Improved temp file handling
* Improved temp file handling. Transfers of content can now be resumed
from temp files later; the resume does not have to be the immediate
next git-annex run.
* unused: Include partially transferred content in the list.
Diffstat (limited to 'Command/Unused.hs')
-rw-r--r-- | Command/Unused.hs | 93 |
1 files changed, 66 insertions, 27 deletions
diff --git a/Command/Unused.hs b/Command/Unused.hs index b9dc62a32..4ffa6a17f 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -7,9 +7,12 @@ module Command.Unused where +import Control.Monad (filterM, unless) import Control.Monad.State (liftIO) import qualified Data.Map as M import Data.Maybe +import System.FilePath +import System.Directory import Command import Types @@ -41,49 +44,71 @@ perform = do checkUnused :: Annex Bool checkUnused = do showNote "checking for unused data..." - unused <- unusedKeys - let list = number 1 unused + (unused, staletmp) <- unusedKeys + let unusedlist = number 0 unused + let staletmplist = number (length unused) staletmp + let list = unusedlist ++ staletmplist g <- Annex.gitRepo liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $ map (\(n, k) -> show n ++ " " ++ show k) list - if null unused - then return True - else do - showLongNote $ w list - return False + unless (null unused) $ + showLongNote $ unusedmsg unusedlist + unless (null staletmp) $ + showLongNote $ staletmpmsg staletmplist + unless (null list) $ + showLongNote $ "\n" + return $ null list + where - w u = unlines $ - ["Some annexed data is no longer pointed to by any files in the repository:", - " NUMBER KEY"] - ++ map cols u ++ - ["(To see where data was previously used, try: git log --stat -S'KEY')", - "(To remove unwanted data: git-annex dropunused NUMBER)", - ""] + unusedmsg u = unlines $ + ["Some annexed data is no longer pointed to by any files in the repository:"] + ++ table u ++ + ["(To see where data was previously used, try: git log --stat -S'KEY')"] ++ + dropmsg + staletmpmsg t = unlines $ + ["Some partially transferred data exists in temporary files:"] + ++ table t ++ dropmsg + dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"] + + table l = [" NUMBER KEY"] ++ map cols l cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k pad n s = s ++ replicate (n - length s) ' ' -number :: Integer -> [a] -> [(Integer, a)] +number :: Int -> [a] -> [(Int, a)] number _ [] = [] -number n (x:xs) = (n, x):(number (n+1) xs) +number n (x:xs) = (n+1, x):(number (n+1) xs) {- Finds keys whose content is present, but that do not seem to be used - - by any files in the git repo. -} -unusedKeys :: Annex [Key] + - by any files in the git repo, or that are only present as tmp files. -} +unusedKeys :: Annex ([Key], [Key]) unusedKeys = do + g <- Annex.gitRepo present <- getKeysPresent referenced <- getKeysReferenced - -- Constructing a single map, of the set that tends to be smaller, - -- appears more efficient in both memory and CPU than constructing - -- and taking the M.difference of two maps. - let present_m = existsMap present - let unused_m = remove referenced present_m - return $ M.keys unused_m + let unused = present `exclude` referenced + + -- Some tmp files may be dups copies of content that is fully present. + -- Simply delete those, while including the keys for the rest of + -- the temp files in the returned list for the user to deal with. + tmps <- tmpKeys + let staletmp = tmps `exclude` present + let duptmp = tmps `exclude` staletmp + _ <- liftIO $ mapM (\t -> removeFile $ gitAnnexTmpLocation g t) duptmp + + return (unused, staletmp) + where - remove a b = foldl (flip M.delete) b a + -- Constructing a single map, of the set that tends to be + -- smaller, appears more efficient in both memory and CPU + -- than constructing and taking the M.difference of two maps. + exclude [] _ = [] -- optimisation + exclude smaller larger = M.keys $ remove larger $ existsMap smaller + + existsMap :: Ord k => [k] -> M.Map k Int + existsMap l = M.fromList $ map (\k -> (k, 1)) l -existsMap :: Ord k => [k] -> M.Map k Int -existsMap l = M.fromList $ map (\k -> (k, 1)) l + remove a b = foldl (flip M.delete) b a {- List of keys referenced by symlinks in the git repo. -} getKeysReferenced :: Annex [Key] @@ -92,3 +117,17 @@ getKeysReferenced = do files <- liftIO $ Git.inRepo g [Git.workTree g] keypairs <- mapM Backend.lookupFile files return $ map fst $ catMaybes keypairs + +{- List of keys that have temp files in the git repo. -} +tmpKeys :: Annex [Key] +tmpKeys = do + g <- Annex.gitRepo + let tmp = gitAnnexTmpDir g + exists <- liftIO $ doesDirectoryExist tmp + if (not exists) + then return [] + else do + contents <- liftIO $ getDirectoryContents tmp + files <- liftIO $ filterM doesFileExist $ + map (tmp </>) contents + return $ map (fileKey . takeFileName) files |