diff options
author | Joey Hess <joey@kitenet.net> | 2014-03-12 15:18:43 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2014-03-12 15:18:43 -0400 |
commit | 3a7b7c8ac97f263e5fdf6281ef6812aba4af0042 (patch) | |
tree | 42be53b2ff001d4c8b3471aea090c68de33de879 | |
parent | 4d79d2327819111f97954e10dce2c8ce53b0ab31 (diff) |
fully fix fsck memory use by iterative fscking
Not very well tested, but I'm sure it doesn't eg, loop forever.
-rw-r--r-- | Git/Fsck.hs | 26 | ||||
-rw-r--r-- | Git/Repair.hs | 101 | ||||
-rw-r--r-- | Logs/FsckResults.hs | 28 | ||||
-rw-r--r-- | doc/bugs/enormous_fsck_output_OOM.mdwn | 10 |
4 files changed, 108 insertions, 57 deletions
diff --git a/Git/Fsck.hs b/Git/Fsck.hs index e8fa02129..80f76dd90 100644 --- a/Git/Fsck.hs +++ b/Git/Fsck.hs @@ -28,7 +28,12 @@ import Control.Concurrent.Async type MissingObjects = S.Set Sha -data FsckResults = FsckFoundMissing MissingObjects | FsckFailed +data FsckResults + = FsckFoundMissing + { missingObjects :: MissingObjects + , missingObjectsTruncated :: Bool + } + | FsckFailed deriving (Show) {- Runs fsck to find some of the broken objects in the repository. @@ -55,22 +60,25 @@ findBroken batchmode r = do , std_err = CreatePipe } (bad1, bad2) <- concurrently - (readMissingObjs r supportsNoDangling (stdoutHandle p)) - (readMissingObjs r supportsNoDangling (stderrHandle p)) + (readMissingObjs maxobjs r supportsNoDangling (stdoutHandle p)) + (readMissingObjs maxobjs r supportsNoDangling (stderrHandle p)) fsckok <- checkSuccessProcess pid + let truncated = S.size bad1 == maxobjs || S.size bad1 == maxobjs let badobjs = S.union bad1 bad2 if S.null badobjs && not fsckok then return FsckFailed - else return $ FsckFoundMissing badobjs + else return $ FsckFoundMissing badobjs truncated + where + maxobjs = 10000 foundBroken :: FsckResults -> Bool foundBroken FsckFailed = True -foundBroken (FsckFoundMissing s) = not (S.null s) +foundBroken (FsckFoundMissing s _) = not (S.null s) knownMissing :: FsckResults -> MissingObjects knownMissing FsckFailed = S.empty -knownMissing (FsckFoundMissing s) = s +knownMissing (FsckFoundMissing s _) = s {- Finds objects that are missing from the git repsitory, or are corrupt. - @@ -80,9 +88,9 @@ knownMissing (FsckFoundMissing s) = s findMissing :: [Sha] -> Repo -> IO MissingObjects findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs -readMissingObjs :: Repo -> Bool -> Handle -> IO MissingObjects -readMissingObjs r supportsNoDangling h = do - objs <- findShas supportsNoDangling <$> hGetContents h +readMissingObjs :: Int -> Repo -> Bool -> Handle -> IO MissingObjects +readMissingObjs maxobjs r supportsNoDangling h = do + objs <- take maxobjs . findShas supportsNoDangling <$> hGetContents h findMissing objs r isMissing :: Sha -> Repo -> IO Bool diff --git a/Git/Repair.hs b/Git/Repair.hs index 0f31854b7..67ded359f 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -1,7 +1,6 @@ {- git repository recovery -import qualified Data.Set as S - - - Copyright 2013 Joey Hess <joey@kitenet.net> + - Copyright 2013-2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -115,7 +114,9 @@ retrieveMissingObjects missing referencerepo r void $ copyObjects tmpr r case stillmissing of FsckFailed -> return $ FsckFailed - FsckFoundMissing s -> FsckFoundMissing <$> findMissing (S.toList s) r + FsckFoundMissing s t -> FsckFoundMissing + <$> findMissing (S.toList s) r + <*> pure t , return stillmissing ) pullremotes tmpr (rmt:rmts) fetchrefs ms @@ -128,9 +129,9 @@ retrieveMissingObjects missing referencerepo r void $ copyObjects tmpr r case ms of FsckFailed -> pullremotes tmpr rmts fetchrefs ms - FsckFoundMissing s -> do + FsckFoundMissing s t -> do stillmissing <- findMissing (S.toList s) r - pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing) + pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing t) , pullremotes tmpr rmts fetchrefs ms ) fetchfrom fetchurl ps = runBool $ @@ -278,7 +279,7 @@ findUncorruptedCommit missing goodcommits branch r = do then return (Just c, gcs') else findfirst gcs' cs -{- Verifies tha none of the missing objects in the set are used by +{- Verifies that none of the missing objects in the set are used by - the commit. Also adds to a set of commit shas that have been verified to - be good, which can be passed into subsequent calls to avoid - redundant work when eg, chasing down branches to find the first @@ -452,7 +453,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do missing <- findBroken False g stillmissing <- retrieveMissingObjects missing referencerepo g case stillmissing of - FsckFoundMissing s + FsckFoundMissing s t | S.null s -> if repoIsLocalBare g then successfulfinish [] else ifM (checkIndex g) @@ -465,7 +466,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do ) | otherwise -> if forced then ifM (checkIndex g) - ( continuerepairs s + ( forcerepair s t , corruptedindex ) else do @@ -478,17 +479,16 @@ runRepair' removablebranch fsckresult forced referencerepo g = do | forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex g) ( do cleanCorruptObjects FsckFailed g - missing' <- findBroken False g - case missing' of + stillmissing' <- findBroken False g + case stillmissing' of FsckFailed -> return (False, []) - FsckFoundMissing stillmissing' -> - continuerepairs stillmissing' + FsckFoundMissing s t -> forcerepair s t , corruptedindex ) | otherwise -> unsuccessfulfinish where - continuerepairs stillmissing = do - (removedbranches, goodcommits) <- removeBadBranches removablebranch stillmissing emptyGoodCommits g + repairbranches missing = do + (removedbranches, goodcommits) <- removeBadBranches removablebranch missing emptyGoodCommits g let remotebranches = filter isTrackingBranch removedbranches unless (null remotebranches) $ putStrLn $ unwords @@ -496,32 +496,43 @@ runRepair' removablebranch fsckresult forced referencerepo g = do , show (length remotebranches) , "remote tracking branches that referred to missing objects." ] - (resetbranches, deletedbranches, _) <- resetLocalBranches stillmissing goodcommits g + (resetbranches, deletedbranches, _) <- resetLocalBranches missing goodcommits g displayList (map fromRef resetbranches) "Reset these local branches to old versions before the missing objects were committed:" displayList (map fromRef deletedbranches) "Deleted these local branches, which could not be recovered due to missing objects:" + return (resetbranches ++ deletedbranches) + + forcerepair missing fscktruncated = do + modifiedbranches <- repairbranches missing deindexedfiles <- rewriteIndex g displayList deindexedfiles "Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate." - let modifiedbranches = resetbranches ++ deletedbranches - if null resetbranches && null deletedbranches - then successfulfinish modifiedbranches - else do - unless (repoIsLocalBare g) $ do - mcurr <- Branch.currentUnsafe g - case mcurr of - Nothing -> return () - Just curr -> when (any (== curr) modifiedbranches) $ do + + -- When the fsck results were truncated, try + -- fscking again, and as long as different + -- missing objects are found, continue + -- the repair process. + if fscktruncated + then do + fsckresult' <- findBroken False g + case fsckresult' of + FsckFailed -> do + putStrLn "git fsck is failing" + return (False, modifiedbranches) + FsckFoundMissing s _ + | S.null s -> successfulfinish modifiedbranches + | S.null (s `S.difference` missing) -> do putStrLn $ unwords - [ "You currently have" - , fromRef curr - , "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!" + [ show (S.size s) + , "missing objects could not be recovered!" ] - putStrLn "Successfully recovered repository!" - putStrLn "Please carefully check that the changes mentioned above are ok.." - return (True, modifiedbranches) - + return (False, modifiedbranches) + | otherwise -> do + (ok, modifiedbranches') <- runRepairOf fsckresult' removablebranch forced referencerepo g + return (ok, modifiedbranches++modifiedbranches') + else successfulfinish modifiedbranches + corruptedindex = do nukeFile (indexFile g) -- The corrupted index can prevent fsck from finding other @@ -531,12 +542,28 @@ runRepair' removablebranch fsckresult forced referencerepo g = do putStrLn "Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate." return result - successfulfinish modifiedbranches = do - mapM_ putStrLn - [ "Successfully recovered repository!" - , "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok." - ] - return (True, modifiedbranches) + successfulfinish modifiedbranches + | null modifiedbranches = do + mapM_ putStrLn + [ "Successfully recovered repository!" + , "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok." + ] + return (True, modifiedbranches) + | otherwise = do + unless (repoIsLocalBare g) $ do + mcurr <- Branch.currentUnsafe g + case mcurr of + Nothing -> return () + Just curr -> when (any (== curr) modifiedbranches) $ do + putStrLn $ unwords + [ "You currently have" + , fromRef curr + , "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!" + ] + putStrLn "Successfully recovered repository!" + putStrLn "Please carefully check that the changes mentioned above are ok.." + return (True, modifiedbranches) + unsuccessfulfinish = do if repoIsLocalBare g then do diff --git a/Logs/FsckResults.hs b/Logs/FsckResults.hs index 3538bdc40..619dd586c 100644 --- a/Logs/FsckResults.hs +++ b/Logs/FsckResults.hs @@ -23,25 +23,31 @@ writeFsckResults u fsckresults = do logfile <- fromRepo $ gitAnnexFsckResultsLog u liftIO $ case fsckresults of - FsckFailed -> store S.empty logfile - FsckFoundMissing s + FsckFailed -> store S.empty False logfile + FsckFoundMissing s t | S.null s -> nukeFile logfile - | otherwise -> store s logfile + | otherwise -> store s t logfile where - store s logfile = do + store s t logfile = do createDirectoryIfMissing True (parentDir logfile) - liftIO $ viaTmp writeFile logfile $ serialize s - serialize = unlines . map fromRef . S.toList + liftIO $ viaTmp writeFile logfile $ serialize s t + serialize s t = + let ls = map fromRef (S.toList s) + in if t + then unlines ("truncated":ls) + else unlines ls readFsckResults :: UUID -> Annex FsckResults readFsckResults u = do logfile <- fromRepo $ gitAnnexFsckResultsLog u - liftIO $ catchDefaultIO (FsckFoundMissing S.empty) $ - deserialize <$> readFile logfile + liftIO $ catchDefaultIO (FsckFoundMissing S.empty False) $ + deserialize . lines <$> readFile logfile where - deserialize l = - let s = S.fromList $ map Ref $ lines l - in if S.null s then FsckFailed else FsckFoundMissing s + deserialize ("truncated":ls) = deserialize' ls True + deserialize ls = deserialize' ls False + deserialize' ls t = + let s = S.fromList $ map Ref ls + in if S.null s then FsckFailed else FsckFoundMissing s t clearFsckResults :: UUID -> Annex () clearFsckResults = liftIO . nukeFile <=< fromRepo . gitAnnexFsckResultsLog diff --git a/doc/bugs/enormous_fsck_output_OOM.mdwn b/doc/bugs/enormous_fsck_output_OOM.mdwn index 975674b5c..b06655354 100644 --- a/doc/bugs/enormous_fsck_output_OOM.mdwn +++ b/doc/bugs/enormous_fsck_output_OOM.mdwn @@ -18,3 +18,13 @@ So I tried to follow your advice here and increase the stack: git-annex: Most RTS options are disabled. Link with -rtsopts to enable them. I wasn't sure what to do next, so any help would be appreciated. + +> Now only 20k problem shas max (more likely 10k) are collected from fsck, +> so it won't use much memory (60 mb or so). If it had to truncate +> shas from fsck, it will re-run fsck after the repair process, +> which should either find no problems left (common when eg when all missing shas +> were able to be fetched from remotes), or find a new set of problem +> shas, which it can feed back through the repair process. +> +> If the repository is very large, this means more work, but it shouldn't +> run out of memory now. [[fixed|done]] --[[Joey]] |