diff options
author | Joey Hess <joey@kitenet.net> | 2011-04-29 13:59:00 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2011-04-29 13:59:00 -0400 |
commit | 43f0a666f0f6cc152a2b778921831d6d7daedcaf (patch) | |
tree | bd65e820843c23677131f29517064f543683d766 | |
parent | 49efc6c39928baec03d7dd0d5cb37f346432f1d3 (diff) |
unused: Now also lists files fsck places in .git/annex/bad/
-rw-r--r-- | Command/DropUnused.hs | 76 | ||||
-rw-r--r-- | Command/Unused.hs | 100 | ||||
-rw-r--r-- | Locations.hs | 11 | ||||
-rw-r--r-- | debian/changelog | 1 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 2 |
5 files changed, 113 insertions, 77 deletions
diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs index 1eec68820..b129235e1 100644 --- a/Command/DropUnused.hs +++ b/Command/DropUnused.hs @@ -21,54 +21,66 @@ import qualified Annex import qualified Command.Drop import qualified Command.Move import qualified Remote +import qualified GitRepo as Git import Backend import Key +type UnusedMap = M.Map String Key + command :: [Command] command = [repoCommand "dropunused" (paramRepeating paramNumber) seek "drop unused file content"] seek :: [CommandSeek] -seek = [withUnusedMap] +seek = [withUnusedMaps] -{- Read unusedlog once, and pass the map to each start action. -} -withUnusedMap :: CommandSeek -withUnusedMap params = do - m <- readUnusedLog - return $ map (start m) params +{- Read unused logs once, and pass the maps to each start action. -} +withUnusedMaps :: CommandSeek +withUnusedMaps params = do + unused <- readUnusedLog "" + unusedbad <- readUnusedLog "bad" + unusedtmp <- readUnusedLog "tmp" + return $ map (start (unused, unusedbad, unusedtmp)) params -start :: M.Map String Key -> CommandStartString -start m s = notBareRepo $ do - case M.lookup s m of - Nothing -> return Nothing - Just key -> do - showStart "dropunused" s - from <- Annex.getState Annex.fromremote - case from of - Just name -> do - r <- Remote.byName name - return $ Just $ performRemote r key - _ -> return $ Just $ perform key +start :: (UnusedMap, UnusedMap, UnusedMap) -> CommandStartString +start (unused, unusedbad, unusedtmp) s = notBareRepo $ search + [ (unused, perform) + , (unusedbad, performOther gitAnnexBadLocation) + , (unusedtmp, performOther gitAnnexTmpLocation) + ] + where + search [] = return Nothing + search ((m, a):rest) = do + case M.lookup s m of + Nothing -> search rest + Just key -> do + showStart "dropunused" s + return $ Just $ a key -{- drop both content in the backend and any tmp file for the key -} perform :: Key -> CommandPerform perform key = do - g <- Annex.gitRepo - let tmp = gitAnnexTmpLocation g key - tmp_exists <- liftIO $ doesFileExist tmp - when tmp_exists $ liftIO $ removeFile tmp - backend <- keyBackend key - Command.Drop.perform key backend (Just 0) -- force drop + from <- Annex.getState Annex.fromremote + case from of + Just name -> do + r <- Remote.byName name + showNote $ "from " ++ Remote.name r ++ "..." + return $ Just $ Command.Move.fromCleanup r True key + _ -> do + backend <- keyBackend key + Command.Drop.perform key backend (Just 0) -- force drop -performRemote :: Remote.Remote Annex -> Key -> CommandPerform -performRemote r key = do - showNote $ "from " ++ Remote.name r ++ "..." - return $ Just $ Command.Move.fromCleanup r True key +performOther :: (Git.Repo -> Key -> FilePath) -> Key -> CommandPerform +performOther filespec key = do + g <- Annex.gitRepo + let f = filespec g key + e <- liftIO $ doesFileExist f + when e $ liftIO $ removeFile f + return $ Just $ return True -readUnusedLog :: Annex (M.Map String Key) -readUnusedLog = do +readUnusedLog :: FilePath -> Annex UnusedMap +readUnusedLog prefix = do g <- Annex.gitRepo - let f = gitAnnexUnusedLog g + let f = gitAnnexUnusedLog prefix g e <- liftIO $ doesFileExist f if e then do diff --git a/Command/Unused.hs b/Command/Unused.hs index a3fb6fe23..67f10581d 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -7,7 +7,7 @@ module Command.Unused where -import Control.Monad (filterM, unless, forM_) +import Control.Monad (filterM, unless, forM_, when) import Control.Monad.State (liftIO) import qualified Data.Set as S import Data.Maybe @@ -51,14 +51,17 @@ perform = do checkUnused :: Annex () checkUnused = do - (unused, staletmp) <- unusedKeys - let unusedlist = number 0 unused - let staletmplist = number (length unused) staletmp - let list = unusedlist ++ staletmplist - writeUnusedFile list - unless (null unused) $ showLongNote $ unusedMsg unusedlist - unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist - unless (null list) $ showLongNote $ "\n" + (unused, stalebad, staletmp) <- unusedKeys + n <- list "" unusedMsg unused 0 + n' <- list "bad" staleBadMsg stalebad n + _ <- list "tmp" staleTmpMsg staletmp n' + return () + where + list file msg l c = do + let unusedlist = number c l + when (not $ null l) $ showLongNote $ msg unusedlist + writeUnusedFile file unusedlist + return $ length l checkRemoteUnused :: Remote.Remote Annex -> Annex () checkRemoteUnused r = do @@ -69,7 +72,7 @@ checkRemoteUnused r = do remotehas <- filterM isthere logged let remoteunused = remotehas `exclude` referenced let list = number 0 remoteunused - writeUnusedFile list + writeUnusedFile "" list unless (null remoteunused) $ do showLongNote $ remoteUnusedMsg r list showLongNote $ "\n" @@ -80,10 +83,10 @@ checkRemoteUnused r = do return $ uuid `elem` us uuid = Remote.uuid r -writeUnusedFile :: [(Int, Key)] -> Annex () -writeUnusedFile l = do +writeUnusedFile :: FilePath -> [(Int, Key)] -> Annex () +writeUnusedFile prefix l = do g <- Annex.gitRepo - liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ + liftIO $ safeWriteFile (gitAnnexUnusedLog prefix g) $ unlines $ map (\(n, k) -> show n ++ " " ++ show k) l table :: [(Int, Key)] -> [String] @@ -100,7 +103,12 @@ staleTmpMsg :: [(Int, Key)] -> String staleTmpMsg t = unlines $ ["Some partially transferred data exists in temporary files:"] ++ table t ++ [dropMsg Nothing] - + +staleBadMsg :: [(Int, Key)] -> String +staleBadMsg t = unlines $ + ["Some corrupted files have been preserved by fsck, just in case:"] + ++ table t ++ [dropMsg Nothing] + unusedMsg :: [(Int, Key)] -> String unusedMsg u = unusedMsg' u ["Some annexed data is no longer used by any files in the repository:"] @@ -127,36 +135,28 @@ dropMsg :: Maybe (Remote.Remote Annex) -> String dropMsg Nothing = dropMsg' "" dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r dropMsg' :: String -> String -dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)" +dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)\n" {- Finds keys whose content is present, but that do not seem to be used - - by any files in the git repo, or that are only present as tmp files. -} -unusedKeys :: Annex ([Key], [Key]) + - by any files in the git repo, or that are only present as bad or tmp + - files. -} +unusedKeys :: Annex ([Key], [Key], [Key]) unusedKeys = do - g <- Annex.gitRepo - fast <- Annex.getState Annex.fast if fast then do - showNote "fast mode enabled; only finding temporary files" - tmps <- tmpKeys - return ([], tmps) + showNote "fast mode enabled; only finding stale files" + tmp <- staleKeys' gitAnnexTmpDir + bad <- staleKeys' gitAnnexBadDir + return ([], bad, tmp) else do showNote "checking for unused data..." present <- getKeysPresent referenced <- getKeysReferenced - tmps <- tmpKeys - let unused = present `exclude` referenced - let staletmp = tmps `exclude` present - let duptmp = tmps `exclude` staletmp - - -- Tmp files that are dups of content already present - -- can simply be removed. - liftIO $ forM_ duptmp $ \t -> removeFile $ - gitAnnexTmpLocation g t - - return (unused, staletmp) + staletmp <- staleKeys gitAnnexTmpDir present + stalebad <- staleKeys gitAnnexBadDir present + return (unused, stalebad, staletmp) {- Finds items in the first, smaller list, that are not - present in the second, larger list. @@ -178,16 +178,34 @@ getKeysReferenced = do keypairs <- mapM Backend.lookupFile files return $ map fst $ catMaybes keypairs -{- List of keys that have temp files in the git repo. -} -tmpKeys :: Annex [Key] -tmpKeys = do +{- Looks in the specified directory for bad/tmp keys, and returns a list + - of those that might still have value, or might be stale and removable. + - + - When a list of presently available keys is provided, stale keys + - that no longer have value are deleted. + -} +staleKeys :: (Git.Repo -> FilePath) -> [Key] -> Annex [Key] +staleKeys dirspec present = do + contents <- staleKeys' dirspec + + let stale = contents `exclude` present + let dup = contents `exclude` stale + + g <- Annex.gitRepo + let dir = dirspec g + liftIO $ forM_ dup $ \t -> removeFile $ dir </> keyFile t + + return stale + +staleKeys' :: (Git.Repo -> FilePath) -> Annex [Key] +staleKeys' dirspec = do g <- Annex.gitRepo - let tmp = gitAnnexTmpDir g - exists <- liftIO $ doesDirectoryExist tmp - if (not exists) + let dir = dirspec g + exists <- liftIO $ doesDirectoryExist dir + if not exists then return [] else do - contents <- liftIO $ getDirectoryContents tmp + contents <- liftIO $ getDirectoryContents dir files <- liftIO $ filterM doesFileExist $ - map (tmp </>) contents + map (dir </>) contents return $ catMaybes $ map (fileKey . takeFileName) files diff --git a/Locations.hs b/Locations.hs index f263ea526..1c4f8296e 100644 --- a/Locations.hs +++ b/Locations.hs @@ -17,6 +17,7 @@ module Locations ( gitAnnexTmpDir, gitAnnexTmpLocation, gitAnnexBadDir, + gitAnnexBadLocation, gitAnnexUnusedLog, isLinkToAnnex, logFile, @@ -105,9 +106,13 @@ gitAnnexTmpLocation r key = gitAnnexTmpDir r </> keyFile key gitAnnexBadDir :: Git.Repo -> FilePath gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad" -{- .git/annex/unused is used to number possibly unused keys -} -gitAnnexUnusedLog :: Git.Repo -> FilePath -gitAnnexUnusedLog r = gitAnnexDir r </> "unused" +{- The bad file to use for a given key. -} +gitAnnexBadLocation :: Git.Repo -> Key -> FilePath +gitAnnexBadLocation r key = gitAnnexBadDir r </> keyFile key + +{- .git/annex/*unused is used to number possibly unused keys -} +gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath +gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused") {- Checks a symlink target to see if it appears to point to annexed content. -} isLinkToAnnex :: FilePath -> Bool diff --git a/debian/changelog b/debian/changelog index 92c05a5a6..813816079 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,7 @@ git-annex (0.20110428) UNRELEASED; urgency=low * Fix hasKeyCheap setting for bup and rsync special remotes. * Add hook special remotes. * Avoid crashing when an existing key is readded to the annex. + * unused: Now also lists files fsck places in .git/annex/bad/ -- Joey Hess <joeyh@debian.org> Thu, 28 Apr 2011 14:38:16 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 3e91e7ad9..450b95a0d 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -158,7 +158,7 @@ Many git-annex commands will stage changes for later `git commit` by you. Checks the annex for data that does not correspond to any files currently in the respository, and prints a numbered list of the data. - To only show unused temp files, specify --fast + To only show unused temp and bad files, specify --fast To check data on a remote that does not correspond to any files currently in the local repository, specify --from. |