diff options
-rw-r--r-- | CmdLine.hs | 10 | ||||
-rw-r--r-- | Command.hs | 2 | ||||
-rw-r--r-- | Command/DropUnused.hs | 7 | ||||
-rw-r--r-- | Command/Init.hs | 5 | ||||
-rw-r--r-- | Command/Uninit.hs | 2 | ||||
-rw-r--r-- | Command/Unused.hs | 96 | ||||
-rw-r--r-- | Content.hs | 2 | ||||
-rw-r--r-- | GitRepo.hs | 4 | ||||
-rw-r--r-- | LocationLog.hs | 8 | ||||
-rw-r--r-- | Locations.hs | 7 | ||||
-rw-r--r-- | debian/changelog | 4 | ||||
-rw-r--r-- | doc/bugs/tmp_file_handling.mdwn | 2 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 2 | ||||
-rw-r--r-- | git-annex-shell.hs | 2 | ||||
-rw-r--r-- | test.hs | 2 |
15 files changed, 99 insertions, 56 deletions
diff --git a/CmdLine.hs b/CmdLine.hs index 68d1e0dd3..82a21d0fc 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -105,13 +105,3 @@ shutdown errnum = do unless (q == GitQueue.empty) $ do showSideAction "Recording state in git..." Annex.queueRun - - -- If nothing failed, clean up any files left in the temp directory, - -- but leave the directory itself. If something failed, temp files - -- are left behind to allow resuming on re-run. - when (errnum == 0) $ do - g <- Annex.gitRepo - let tmp = gitAnnexTmpDir g - exists <- liftIO $ doesDirectoryExist tmp - when exists $ liftIO $ removeDirectoryRecursive tmp - liftIO $ createDirectoryIfMissing True tmp diff --git a/Command.hs b/Command.hs index bebbf3f1b..298234fa0 100644 --- a/Command.hs +++ b/Command.hs @@ -187,7 +187,7 @@ filterFiles l = do let regexp = compile (toregex exclude) [] return $ filter (notExcluded regexp) l' where - notState f = not $ isPrefixOf stateDir f + notState f = not $ stateDir `isPrefixOf` f notExcluded r f = case match r f [] of Nothing -> True Just _ -> False diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs index 9427f8103..63216ce4f 100644 --- a/Command/DropUnused.hs +++ b/Command/DropUnused.hs @@ -7,6 +7,7 @@ module Command.DropUnused where +import Control.Monad (when) import Control.Monad.State (liftIO) import qualified Data.Map as M import System.Directory @@ -33,8 +34,14 @@ start s = do case M.lookup s m of Nothing -> return Nothing Just key -> do + g <- Annex.gitRepo showStart "dropunused" s backend <- keyBackend key + -- drop both content in the backend and any tmp + -- file for the key + let tmp = gitAnnexTmpLocation g key + tmp_exists <- liftIO $ doesFileExist tmp + when tmp_exists $ liftIO $ removeFile tmp return $ Just $ Command.Drop.perform key backend (Just 0) readUnusedLog :: Annex (M.Map String Key) diff --git a/Command/Init.hs b/Command/Init.hs index e780c8863..2976b988d 100644 --- a/Command/Init.hs +++ b/Command/Init.hs @@ -20,6 +20,7 @@ import Version import Messages import Locations import Types +import Utility command :: [Command] command = [Command "init" paramDesc seek @@ -61,7 +62,7 @@ gitAttributesWrite repo = do exists <- doesFileExist attributes if not exists then do - writeFile attributes $ attrLine ++ "\n" + safeWriteFile attributes $ attrLine ++ "\n" commit else do content <- readFile attributes @@ -85,7 +86,7 @@ gitPreCommitHookWrite repo = do if exists then warning $ "pre-commit hook (" ++ hook ++ ") already exists, not configuring" else liftIO $ do - writeFile hook preCommitScript + safeWriteFile hook preCommitScript p <- getPermissions hook setPermissions hook $ p {executable = True} where diff --git a/Command/Uninit.hs b/Command/Uninit.hs index 93465df37..e9406ce3a 100644 --- a/Command/Uninit.hs +++ b/Command/Uninit.hs @@ -59,5 +59,5 @@ gitAttributesUnWrite repo = do attrexists <- doesFileExist attributes when attrexists $ do c <- readFileStrict attributes - writeFile attributes $ unlines $ + safeWriteFile attributes $ unlines $ filter (/= Command.Init.attrLine) $ lines c diff --git a/Command/Unused.hs b/Command/Unused.hs index 5e5698e38..4ffa6a17f 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -7,15 +7,19 @@ module Command.Unused where +import Control.Monad (filterM, unless) import Control.Monad.State (liftIO) import qualified Data.Map as M import Data.Maybe +import System.FilePath +import System.Directory import Command import Types import Content import Messages import Locations +import Utility import qualified Annex import qualified GitRepo as Git import qualified Backend @@ -40,49 +44,71 @@ perform = do checkUnused :: Annex Bool checkUnused = do showNote "checking for unused data..." - unused <- unusedKeys - let list = number 1 unused + (unused, staletmp) <- unusedKeys + let unusedlist = number 0 unused + let staletmplist = number (length unused) staletmp + let list = unusedlist ++ staletmplist g <- Annex.gitRepo - liftIO $ writeFile (gitAnnexUnusedLog g) $ unlines $ + liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $ map (\(n, k) -> show n ++ " " ++ show k) list - if null unused - then return True - else do - showLongNote $ w list - return False + unless (null unused) $ + showLongNote $ unusedmsg unusedlist + unless (null staletmp) $ + showLongNote $ staletmpmsg staletmplist + unless (null list) $ + showLongNote $ "\n" + return $ null list + where - w u = unlines $ - ["Some annexed data is no longer pointed to by any files in the repository:", - " NUMBER KEY"] - ++ map cols u ++ - ["(To see where data was previously used, try: git log --stat -S'KEY')", - "(To remove unwanted data: git-annex dropunused NUMBER)", - ""] + unusedmsg u = unlines $ + ["Some annexed data is no longer pointed to by any files in the repository:"] + ++ table u ++ + ["(To see where data was previously used, try: git log --stat -S'KEY')"] ++ + dropmsg + staletmpmsg t = unlines $ + ["Some partially transferred data exists in temporary files:"] + ++ table t ++ dropmsg + dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"] + + table l = [" NUMBER KEY"] ++ map cols l cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k pad n s = s ++ replicate (n - length s) ' ' -number :: Integer -> [a] -> [(Integer, a)] +number :: Int -> [a] -> [(Int, a)] number _ [] = [] -number n (x:xs) = (n, x):(number (n+1) xs) +number n (x:xs) = (n+1, x):(number (n+1) xs) {- Finds keys whose content is present, but that do not seem to be used - - by any files in the git repo. -} -unusedKeys :: Annex [Key] + - by any files in the git repo, or that are only present as tmp files. -} +unusedKeys :: Annex ([Key], [Key]) unusedKeys = do + g <- Annex.gitRepo present <- getKeysPresent referenced <- getKeysReferenced - -- Constructing a single map, of the set that tends to be smaller, - -- appears more efficient in both memory and CPU than constructing - -- and taking the M.difference of two maps. - let present_m = existsMap present - let unused_m = remove referenced present_m - return $ M.keys unused_m + let unused = present `exclude` referenced + + -- Some tmp files may be dups copies of content that is fully present. + -- Simply delete those, while including the keys for the rest of + -- the temp files in the returned list for the user to deal with. + tmps <- tmpKeys + let staletmp = tmps `exclude` present + let duptmp = tmps `exclude` staletmp + _ <- liftIO $ mapM (\t -> removeFile $ gitAnnexTmpLocation g t) duptmp + + return (unused, staletmp) + where - remove a b = foldl (flip M.delete) b a + -- Constructing a single map, of the set that tends to be + -- smaller, appears more efficient in both memory and CPU + -- than constructing and taking the M.difference of two maps. + exclude [] _ = [] -- optimisation + exclude smaller larger = M.keys $ remove larger $ existsMap smaller + + existsMap :: Ord k => [k] -> M.Map k Int + existsMap l = M.fromList $ map (\k -> (k, 1)) l -existsMap :: Ord k => [k] -> M.Map k Int -existsMap l = M.fromList $ map (\k -> (k, 1)) l + remove a b = foldl (flip M.delete) b a {- List of keys referenced by symlinks in the git repo. -} getKeysReferenced :: Annex [Key] @@ -91,3 +117,17 @@ getKeysReferenced = do files <- liftIO $ Git.inRepo g [Git.workTree g] keypairs <- mapM Backend.lookupFile files return $ map fst $ catMaybes keypairs + +{- List of keys that have temp files in the git repo. -} +tmpKeys :: Annex [Key] +tmpKeys = do + g <- Annex.gitRepo + let tmp = gitAnnexTmpDir g + exists <- liftIO $ doesDirectoryExist tmp + if (not exists) + then return [] + else do + contents <- liftIO $ getDirectoryContents tmp + files <- liftIO $ filterM doesFileExist $ + map (tmp </>) contents + return $ map (fileKey . takeFileName) files diff --git a/Content.hs b/Content.hs index d0ed8d861..e16ad883c 100644 --- a/Content.hs +++ b/Content.hs @@ -67,7 +67,7 @@ logStatus key status = do getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool getViaTmp key action = do g <- Annex.gitRepo - let tmp = gitAnnexTmpDir g </> keyFile key + let tmp = gitAnnexTmpLocation g key liftIO $ createDirectoryIfMissing True (parentDir tmp) success <- action tmp if success diff --git a/GitRepo.hs b/GitRepo.hs index ec363fe73..4e69544d4 100644 --- a/GitRepo.hs +++ b/GitRepo.hs @@ -323,7 +323,7 @@ configRemotes repo = map construct remotepairs | otherwise = repoFromPath v -- git remotes can be written scp style -- [user@]host:dir -- where dir is relative to the user's home directory. - scpstyle v = isInfixOf ":" v && (not $ isInfixOf "//" v) + scpstyle v = ":" `isInfixOf` v && (not $ "//" `isInfixOf` v) scptourl v = "ssh://" ++ host ++ slash dir where bits = split ":" v @@ -458,7 +458,7 @@ prop_idempotent_deencode s = s == decodeGitFile (encodeGitFile s) -} absDir :: String -> IO String absDir d - | isPrefixOf "/" d = expandt d + | "/" `isPrefixOf` d = expandt d | otherwise = do h <- myhomedir return $ h ++ d diff --git a/LocationLog.hs b/LocationLog.hs index 926939051..56953bc02 100644 --- a/LocationLog.hs +++ b/LocationLog.hs @@ -32,7 +32,6 @@ import Data.Time import System.Locale import qualified Data.Map as Map import System.Directory -import System.Posix.Process import Control.Monad (when) import qualified GitRepo as Git @@ -112,12 +111,7 @@ readLog file = do {- Writes a set of lines to a log file -} writeLog :: FilePath -> [LogLine] -> IO () -writeLog file ls = do - pid <- getProcessID - let tmpfile = file ++ ".tmp" ++ show pid - createDirectoryIfMissing True (parentDir file) - writeFile tmpfile $ unlines $ map show ls - renameFile tmpfile file +writeLog file ls = safeWriteFile file (unlines $ map show ls) {- Generates a new LogLine with the current date. -} logNow :: LogStatus -> UUID -> IO LogLine diff --git a/Locations.hs b/Locations.hs index b2624754e..d30ceb136 100644 --- a/Locations.hs +++ b/Locations.hs @@ -15,6 +15,7 @@ module Locations ( gitAnnexDir, gitAnnexObjectDir, gitAnnexTmpDir, + gitAnnexTmpLocation, gitAnnexBadDir, gitAnnexUnusedLog, isLinkToAnnex, @@ -83,6 +84,10 @@ gitAnnexObjectDir r = addTrailingPathSeparator $ Git.workTree r </> objectDir gitAnnexTmpDir :: Git.Repo -> FilePath gitAnnexTmpDir r = addTrailingPathSeparator $ gitAnnexDir r </> "tmp" +{- The temp file to use for a given key. -} +gitAnnexTmpLocation :: Git.Repo -> Key -> FilePath +gitAnnexTmpLocation r key = gitAnnexTmpDir r </> keyFile key + {- .git-annex/bad/ is used for bad files found during fsck -} gitAnnexBadDir :: Git.Repo -> FilePath gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad" @@ -93,7 +98,7 @@ gitAnnexUnusedLog r = gitAnnexDir r </> "unused" {- Checks a symlink target to see if it appears to point to annexed content. -} isLinkToAnnex :: FilePath -> Bool -isLinkToAnnex s = isInfixOf ("/" ++ objectDir) s +isLinkToAnnex s = ("/" ++ objectDir) `isInfixOf` s {- Converts a key into a filename fragment. - diff --git a/debian/changelog b/debian/changelog index 92b21173c..cf17e8a00 100644 --- a/debian/changelog +++ b/debian/changelog @@ -8,6 +8,10 @@ git-annex (0.19) UNRELEASED; urgency=low * fsck, drop: Take untrusted repositories into account. * Bugfix: Files were copied from trusted remotes first even if their annex.cost was higher than other remotes. + * Improved temp file handling. Transfers of content can now be resumed + from temp files later; the resume does not have to be the immediate + next git-annex run. + * unused: Include partially transferred content in the list. -- Joey Hess <joeyh@debian.org> Wed, 19 Jan 2011 18:07:51 -0400 diff --git a/doc/bugs/tmp_file_handling.mdwn b/doc/bugs/tmp_file_handling.mdwn index 3c6c2a597..9db932e57 100644 --- a/doc/bugs/tmp_file_handling.mdwn +++ b/doc/bugs/tmp_file_handling.mdwn @@ -9,3 +9,5 @@ This presents 2 problems: finished. --[[Joey]] + +[[done]] diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index d2cde35a0..83a286b0e 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -132,7 +132,7 @@ Many git-annex commands will stage changes for later `git commit` by you. * unused Checks the annex for data that is not used by any files currently - in the annex, and prints a numbered list of the data. + in the annex, and prints a numbered list of the data. * dropunused [number ...] diff --git a/git-annex-shell.hs b/git-annex-shell.hs index 78dd77790..fa2a7f606 100644 --- a/git-annex-shell.hs +++ b/git-annex-shell.hs @@ -51,7 +51,7 @@ main' c@(cmd:dir:params) main' c@(cmd:_) -- Handle the case of being the user's login shell. It will be passed -- a single string containing all the real parameters. - | isPrefixOf "git-annex-shell " cmd = main' $ drop 1 $ shellUnEscape cmd + | "git-annex-shell " `isPrefixOf` cmd = main' $ drop 1 $ shellUnEscape cmd | elem cmd builtins = failure | otherwise = external c @@ -88,7 +88,7 @@ test_init = "git-annex init" ~: TestCase $ innewrepo $ do e <- doesFileExist annexlog e @? (annexlog ++ " not created") c <- readFile annexlog - isInfixOf reponame c @? annexlog ++ " does not contain repo name" + reponame `isInfixOf` c @? annexlog ++ " does not contain repo name" where annexlog = ".git-annex/uuid.log" reponame = "test repo" |