summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-04-29 13:59:00 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-04-29 13:59:00 -0400
commit43f0a666f0f6cc152a2b778921831d6d7daedcaf (patch)
treebd65e820843c23677131f29517064f543683d766
parent49efc6c39928baec03d7dd0d5cb37f346432f1d3 (diff)
unused: Now also lists files fsck places in .git/annex/bad/
-rw-r--r--Command/DropUnused.hs76
-rw-r--r--Command/Unused.hs100
-rw-r--r--Locations.hs11
-rw-r--r--debian/changelog1
-rw-r--r--doc/git-annex.mdwn2
5 files changed, 113 insertions, 77 deletions
diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs
index 1eec68820..b129235e1 100644
--- a/Command/DropUnused.hs
+++ b/Command/DropUnused.hs
@@ -21,54 +21,66 @@ import qualified Annex
import qualified Command.Drop
import qualified Command.Move
import qualified Remote
+import qualified GitRepo as Git
import Backend
import Key
+type UnusedMap = M.Map String Key
+
command :: [Command]
command = [repoCommand "dropunused" (paramRepeating paramNumber) seek
"drop unused file content"]
seek :: [CommandSeek]
-seek = [withUnusedMap]
+seek = [withUnusedMaps]
-{- Read unusedlog once, and pass the map to each start action. -}
-withUnusedMap :: CommandSeek
-withUnusedMap params = do
- m <- readUnusedLog
- return $ map (start m) params
+{- Read unused logs once, and pass the maps to each start action. -}
+withUnusedMaps :: CommandSeek
+withUnusedMaps params = do
+ unused <- readUnusedLog ""
+ unusedbad <- readUnusedLog "bad"
+ unusedtmp <- readUnusedLog "tmp"
+ return $ map (start (unused, unusedbad, unusedtmp)) params
-start :: M.Map String Key -> CommandStartString
-start m s = notBareRepo $ do
- case M.lookup s m of
- Nothing -> return Nothing
- Just key -> do
- showStart "dropunused" s
- from <- Annex.getState Annex.fromremote
- case from of
- Just name -> do
- r <- Remote.byName name
- return $ Just $ performRemote r key
- _ -> return $ Just $ perform key
+start :: (UnusedMap, UnusedMap, UnusedMap) -> CommandStartString
+start (unused, unusedbad, unusedtmp) s = notBareRepo $ search
+ [ (unused, perform)
+ , (unusedbad, performOther gitAnnexBadLocation)
+ , (unusedtmp, performOther gitAnnexTmpLocation)
+ ]
+ where
+ search [] = return Nothing
+ search ((m, a):rest) = do
+ case M.lookup s m of
+ Nothing -> search rest
+ Just key -> do
+ showStart "dropunused" s
+ return $ Just $ a key
-{- drop both content in the backend and any tmp file for the key -}
perform :: Key -> CommandPerform
perform key = do
- g <- Annex.gitRepo
- let tmp = gitAnnexTmpLocation g key
- tmp_exists <- liftIO $ doesFileExist tmp
- when tmp_exists $ liftIO $ removeFile tmp
- backend <- keyBackend key
- Command.Drop.perform key backend (Just 0) -- force drop
+ from <- Annex.getState Annex.fromremote
+ case from of
+ Just name -> do
+ r <- Remote.byName name
+ showNote $ "from " ++ Remote.name r ++ "..."
+ return $ Just $ Command.Move.fromCleanup r True key
+ _ -> do
+ backend <- keyBackend key
+ Command.Drop.perform key backend (Just 0) -- force drop
-performRemote :: Remote.Remote Annex -> Key -> CommandPerform
-performRemote r key = do
- showNote $ "from " ++ Remote.name r ++ "..."
- return $ Just $ Command.Move.fromCleanup r True key
+performOther :: (Git.Repo -> Key -> FilePath) -> Key -> CommandPerform
+performOther filespec key = do
+ g <- Annex.gitRepo
+ let f = filespec g key
+ e <- liftIO $ doesFileExist f
+ when e $ liftIO $ removeFile f
+ return $ Just $ return True
-readUnusedLog :: Annex (M.Map String Key)
-readUnusedLog = do
+readUnusedLog :: FilePath -> Annex UnusedMap
+readUnusedLog prefix = do
g <- Annex.gitRepo
- let f = gitAnnexUnusedLog g
+ let f = gitAnnexUnusedLog prefix g
e <- liftIO $ doesFileExist f
if e
then do
diff --git a/Command/Unused.hs b/Command/Unused.hs
index a3fb6fe23..67f10581d 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -7,7 +7,7 @@
module Command.Unused where
-import Control.Monad (filterM, unless, forM_)
+import Control.Monad (filterM, unless, forM_, when)
import Control.Monad.State (liftIO)
import qualified Data.Set as S
import Data.Maybe
@@ -51,14 +51,17 @@ perform = do
checkUnused :: Annex ()
checkUnused = do
- (unused, staletmp) <- unusedKeys
- let unusedlist = number 0 unused
- let staletmplist = number (length unused) staletmp
- let list = unusedlist ++ staletmplist
- writeUnusedFile list
- unless (null unused) $ showLongNote $ unusedMsg unusedlist
- unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist
- unless (null list) $ showLongNote $ "\n"
+ (unused, stalebad, staletmp) <- unusedKeys
+ n <- list "" unusedMsg unused 0
+ n' <- list "bad" staleBadMsg stalebad n
+ _ <- list "tmp" staleTmpMsg staletmp n'
+ return ()
+ where
+ list file msg l c = do
+ let unusedlist = number c l
+ when (not $ null l) $ showLongNote $ msg unusedlist
+ writeUnusedFile file unusedlist
+ return $ length l
checkRemoteUnused :: Remote.Remote Annex -> Annex ()
checkRemoteUnused r = do
@@ -69,7 +72,7 @@ checkRemoteUnused r = do
remotehas <- filterM isthere logged
let remoteunused = remotehas `exclude` referenced
let list = number 0 remoteunused
- writeUnusedFile list
+ writeUnusedFile "" list
unless (null remoteunused) $ do
showLongNote $ remoteUnusedMsg r list
showLongNote $ "\n"
@@ -80,10 +83,10 @@ checkRemoteUnused r = do
return $ uuid `elem` us
uuid = Remote.uuid r
-writeUnusedFile :: [(Int, Key)] -> Annex ()
-writeUnusedFile l = do
+writeUnusedFile :: FilePath -> [(Int, Key)] -> Annex ()
+writeUnusedFile prefix l = do
g <- Annex.gitRepo
- liftIO $ safeWriteFile (gitAnnexUnusedLog g) $
+ liftIO $ safeWriteFile (gitAnnexUnusedLog prefix g) $
unlines $ map (\(n, k) -> show n ++ " " ++ show k) l
table :: [(Int, Key)] -> [String]
@@ -100,7 +103,12 @@ staleTmpMsg :: [(Int, Key)] -> String
staleTmpMsg t = unlines $
["Some partially transferred data exists in temporary files:"]
++ table t ++ [dropMsg Nothing]
-
+
+staleBadMsg :: [(Int, Key)] -> String
+staleBadMsg t = unlines $
+ ["Some corrupted files have been preserved by fsck, just in case:"]
+ ++ table t ++ [dropMsg Nothing]
+
unusedMsg :: [(Int, Key)] -> String
unusedMsg u = unusedMsg' u
["Some annexed data is no longer used by any files in the repository:"]
@@ -127,36 +135,28 @@ dropMsg :: Maybe (Remote.Remote Annex) -> String
dropMsg Nothing = dropMsg' ""
dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
dropMsg' :: String -> String
-dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)"
+dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)\n"
{- Finds keys whose content is present, but that do not seem to be used
- - by any files in the git repo, or that are only present as tmp files. -}
-unusedKeys :: Annex ([Key], [Key])
+ - by any files in the git repo, or that are only present as bad or tmp
+ - files. -}
+unusedKeys :: Annex ([Key], [Key], [Key])
unusedKeys = do
- g <- Annex.gitRepo
-
fast <- Annex.getState Annex.fast
if fast
then do
- showNote "fast mode enabled; only finding temporary files"
- tmps <- tmpKeys
- return ([], tmps)
+ showNote "fast mode enabled; only finding stale files"
+ tmp <- staleKeys' gitAnnexTmpDir
+ bad <- staleKeys' gitAnnexBadDir
+ return ([], bad, tmp)
else do
showNote "checking for unused data..."
present <- getKeysPresent
referenced <- getKeysReferenced
- tmps <- tmpKeys
-
let unused = present `exclude` referenced
- let staletmp = tmps `exclude` present
- let duptmp = tmps `exclude` staletmp
-
- -- Tmp files that are dups of content already present
- -- can simply be removed.
- liftIO $ forM_ duptmp $ \t -> removeFile $
- gitAnnexTmpLocation g t
-
- return (unused, staletmp)
+ staletmp <- staleKeys gitAnnexTmpDir present
+ stalebad <- staleKeys gitAnnexBadDir present
+ return (unused, stalebad, staletmp)
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
@@ -178,16 +178,34 @@ getKeysReferenced = do
keypairs <- mapM Backend.lookupFile files
return $ map fst $ catMaybes keypairs
-{- List of keys that have temp files in the git repo. -}
-tmpKeys :: Annex [Key]
-tmpKeys = do
+{- Looks in the specified directory for bad/tmp keys, and returns a list
+ - of those that might still have value, or might be stale and removable.
+ -
+ - When a list of presently available keys is provided, stale keys
+ - that no longer have value are deleted.
+ -}
+staleKeys :: (Git.Repo -> FilePath) -> [Key] -> Annex [Key]
+staleKeys dirspec present = do
+ contents <- staleKeys' dirspec
+
+ let stale = contents `exclude` present
+ let dup = contents `exclude` stale
+
+ g <- Annex.gitRepo
+ let dir = dirspec g
+ liftIO $ forM_ dup $ \t -> removeFile $ dir </> keyFile t
+
+ return stale
+
+staleKeys' :: (Git.Repo -> FilePath) -> Annex [Key]
+staleKeys' dirspec = do
g <- Annex.gitRepo
- let tmp = gitAnnexTmpDir g
- exists <- liftIO $ doesDirectoryExist tmp
- if (not exists)
+ let dir = dirspec g
+ exists <- liftIO $ doesDirectoryExist dir
+ if not exists
then return []
else do
- contents <- liftIO $ getDirectoryContents tmp
+ contents <- liftIO $ getDirectoryContents dir
files <- liftIO $ filterM doesFileExist $
- map (tmp </>) contents
+ map (dir </>) contents
return $ catMaybes $ map (fileKey . takeFileName) files
diff --git a/Locations.hs b/Locations.hs
index f263ea526..1c4f8296e 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -17,6 +17,7 @@ module Locations (
gitAnnexTmpDir,
gitAnnexTmpLocation,
gitAnnexBadDir,
+ gitAnnexBadLocation,
gitAnnexUnusedLog,
isLinkToAnnex,
logFile,
@@ -105,9 +106,13 @@ gitAnnexTmpLocation r key = gitAnnexTmpDir r </> keyFile key
gitAnnexBadDir :: Git.Repo -> FilePath
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"
-{- .git/annex/unused is used to number possibly unused keys -}
-gitAnnexUnusedLog :: Git.Repo -> FilePath
-gitAnnexUnusedLog r = gitAnnexDir r </> "unused"
+{- The bad file to use for a given key. -}
+gitAnnexBadLocation :: Git.Repo -> Key -> FilePath
+gitAnnexBadLocation r key = gitAnnexBadDir r </> keyFile key
+
+{- .git/annex/*unused is used to number possibly unused keys -}
+gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
+gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
{- Checks a symlink target to see if it appears to point to annexed content. -}
isLinkToAnnex :: FilePath -> Bool
diff --git a/debian/changelog b/debian/changelog
index 92c05a5a6..813816079 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,7 @@ git-annex (0.20110428) UNRELEASED; urgency=low
* Fix hasKeyCheap setting for bup and rsync special remotes.
* Add hook special remotes.
* Avoid crashing when an existing key is readded to the annex.
+ * unused: Now also lists files fsck places in .git/annex/bad/
-- Joey Hess <joeyh@debian.org> Thu, 28 Apr 2011 14:38:16 -0400
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 3e91e7ad9..450b95a0d 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -158,7 +158,7 @@ Many git-annex commands will stage changes for later `git commit` by you.
Checks the annex for data that does not correspond to any files currently
in the respository, and prints a numbered list of the data.
- To only show unused temp files, specify --fast
+ To only show unused temp and bad files, specify --fast
To check data on a remote that does not correspond to any files currently
in the local repository, specify --from.