summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-03-12 15:18:43 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-03-12 15:18:43 -0400
commit3a7b7c8ac97f263e5fdf6281ef6812aba4af0042 (patch)
tree42be53b2ff001d4c8b3471aea090c68de33de879
parent4d79d2327819111f97954e10dce2c8ce53b0ab31 (diff)
fully fix fsck memory use by iterative fscking
Not very well tested, but I'm sure it doesn't eg, loop forever.
-rw-r--r--Git/Fsck.hs26
-rw-r--r--Git/Repair.hs101
-rw-r--r--Logs/FsckResults.hs28
-rw-r--r--doc/bugs/enormous_fsck_output_OOM.mdwn10
4 files changed, 108 insertions, 57 deletions
diff --git a/Git/Fsck.hs b/Git/Fsck.hs
index e8fa02129..80f76dd90 100644
--- a/Git/Fsck.hs
+++ b/Git/Fsck.hs
@@ -28,7 +28,12 @@ import Control.Concurrent.Async
type MissingObjects = S.Set Sha
-data FsckResults = FsckFoundMissing MissingObjects | FsckFailed
+data FsckResults
+ = FsckFoundMissing
+ { missingObjects :: MissingObjects
+ , missingObjectsTruncated :: Bool
+ }
+ | FsckFailed
deriving (Show)
{- Runs fsck to find some of the broken objects in the repository.
@@ -55,22 +60,25 @@ findBroken batchmode r = do
, std_err = CreatePipe
}
(bad1, bad2) <- concurrently
- (readMissingObjs r supportsNoDangling (stdoutHandle p))
- (readMissingObjs r supportsNoDangling (stderrHandle p))
+ (readMissingObjs maxobjs r supportsNoDangling (stdoutHandle p))
+ (readMissingObjs maxobjs r supportsNoDangling (stderrHandle p))
fsckok <- checkSuccessProcess pid
+ let truncated = S.size bad1 == maxobjs || S.size bad1 == maxobjs
let badobjs = S.union bad1 bad2
if S.null badobjs && not fsckok
then return FsckFailed
- else return $ FsckFoundMissing badobjs
+ else return $ FsckFoundMissing badobjs truncated
+ where
+ maxobjs = 10000
foundBroken :: FsckResults -> Bool
foundBroken FsckFailed = True
-foundBroken (FsckFoundMissing s) = not (S.null s)
+foundBroken (FsckFoundMissing s _) = not (S.null s)
knownMissing :: FsckResults -> MissingObjects
knownMissing FsckFailed = S.empty
-knownMissing (FsckFoundMissing s) = s
+knownMissing (FsckFoundMissing s _) = s
{- Finds objects that are missing from the git repsitory, or are corrupt.
-
@@ -80,9 +88,9 @@ knownMissing (FsckFoundMissing s) = s
findMissing :: [Sha] -> Repo -> IO MissingObjects
findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs
-readMissingObjs :: Repo -> Bool -> Handle -> IO MissingObjects
-readMissingObjs r supportsNoDangling h = do
- objs <- findShas supportsNoDangling <$> hGetContents h
+readMissingObjs :: Int -> Repo -> Bool -> Handle -> IO MissingObjects
+readMissingObjs maxobjs r supportsNoDangling h = do
+ objs <- take maxobjs . findShas supportsNoDangling <$> hGetContents h
findMissing objs r
isMissing :: Sha -> Repo -> IO Bool
diff --git a/Git/Repair.hs b/Git/Repair.hs
index 0f31854b7..67ded359f 100644
--- a/Git/Repair.hs
+++ b/Git/Repair.hs
@@ -1,7 +1,6 @@
{- git repository recovery
-import qualified Data.Set as S
-
- - Copyright 2013 Joey Hess <joey@kitenet.net>
+ - Copyright 2013-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -115,7 +114,9 @@ retrieveMissingObjects missing referencerepo r
void $ copyObjects tmpr r
case stillmissing of
FsckFailed -> return $ FsckFailed
- FsckFoundMissing s -> FsckFoundMissing <$> findMissing (S.toList s) r
+ FsckFoundMissing s t -> FsckFoundMissing
+ <$> findMissing (S.toList s) r
+ <*> pure t
, return stillmissing
)
pullremotes tmpr (rmt:rmts) fetchrefs ms
@@ -128,9 +129,9 @@ retrieveMissingObjects missing referencerepo r
void $ copyObjects tmpr r
case ms of
FsckFailed -> pullremotes tmpr rmts fetchrefs ms
- FsckFoundMissing s -> do
+ FsckFoundMissing s t -> do
stillmissing <- findMissing (S.toList s) r
- pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing)
+ pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing t)
, pullremotes tmpr rmts fetchrefs ms
)
fetchfrom fetchurl ps = runBool $
@@ -278,7 +279,7 @@ findUncorruptedCommit missing goodcommits branch r = do
then return (Just c, gcs')
else findfirst gcs' cs
-{- Verifies tha none of the missing objects in the set are used by
+{- Verifies that none of the missing objects in the set are used by
- the commit. Also adds to a set of commit shas that have been verified to
- be good, which can be passed into subsequent calls to avoid
- redundant work when eg, chasing down branches to find the first
@@ -452,7 +453,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
missing <- findBroken False g
stillmissing <- retrieveMissingObjects missing referencerepo g
case stillmissing of
- FsckFoundMissing s
+ FsckFoundMissing s t
| S.null s -> if repoIsLocalBare g
then successfulfinish []
else ifM (checkIndex g)
@@ -465,7 +466,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
)
| otherwise -> if forced
then ifM (checkIndex g)
- ( continuerepairs s
+ ( forcerepair s t
, corruptedindex
)
else do
@@ -478,17 +479,16 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
| forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex g)
( do
cleanCorruptObjects FsckFailed g
- missing' <- findBroken False g
- case missing' of
+ stillmissing' <- findBroken False g
+ case stillmissing' of
FsckFailed -> return (False, [])
- FsckFoundMissing stillmissing' ->
- continuerepairs stillmissing'
+ FsckFoundMissing s t -> forcerepair s t
, corruptedindex
)
| otherwise -> unsuccessfulfinish
where
- continuerepairs stillmissing = do
- (removedbranches, goodcommits) <- removeBadBranches removablebranch stillmissing emptyGoodCommits g
+ repairbranches missing = do
+ (removedbranches, goodcommits) <- removeBadBranches removablebranch missing emptyGoodCommits g
let remotebranches = filter isTrackingBranch removedbranches
unless (null remotebranches) $
putStrLn $ unwords
@@ -496,32 +496,43 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
, show (length remotebranches)
, "remote tracking branches that referred to missing objects."
]
- (resetbranches, deletedbranches, _) <- resetLocalBranches stillmissing goodcommits g
+ (resetbranches, deletedbranches, _) <- resetLocalBranches missing goodcommits g
displayList (map fromRef resetbranches)
"Reset these local branches to old versions before the missing objects were committed:"
displayList (map fromRef deletedbranches)
"Deleted these local branches, which could not be recovered due to missing objects:"
+ return (resetbranches ++ deletedbranches)
+
+ forcerepair missing fscktruncated = do
+ modifiedbranches <- repairbranches missing
deindexedfiles <- rewriteIndex g
displayList deindexedfiles
"Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate."
- let modifiedbranches = resetbranches ++ deletedbranches
- if null resetbranches && null deletedbranches
- then successfulfinish modifiedbranches
- else do
- unless (repoIsLocalBare g) $ do
- mcurr <- Branch.currentUnsafe g
- case mcurr of
- Nothing -> return ()
- Just curr -> when (any (== curr) modifiedbranches) $ do
+
+ -- When the fsck results were truncated, try
+ -- fscking again, and as long as different
+ -- missing objects are found, continue
+ -- the repair process.
+ if fscktruncated
+ then do
+ fsckresult' <- findBroken False g
+ case fsckresult' of
+ FsckFailed -> do
+ putStrLn "git fsck is failing"
+ return (False, modifiedbranches)
+ FsckFoundMissing s _
+ | S.null s -> successfulfinish modifiedbranches
+ | S.null (s `S.difference` missing) -> do
putStrLn $ unwords
- [ "You currently have"
- , fromRef curr
- , "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
+ [ show (S.size s)
+ , "missing objects could not be recovered!"
]
- putStrLn "Successfully recovered repository!"
- putStrLn "Please carefully check that the changes mentioned above are ok.."
- return (True, modifiedbranches)
-
+ return (False, modifiedbranches)
+ | otherwise -> do
+ (ok, modifiedbranches') <- runRepairOf fsckresult' removablebranch forced referencerepo g
+ return (ok, modifiedbranches++modifiedbranches')
+ else successfulfinish modifiedbranches
+
corruptedindex = do
nukeFile (indexFile g)
-- The corrupted index can prevent fsck from finding other
@@ -531,12 +542,28 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
putStrLn "Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate."
return result
- successfulfinish modifiedbranches = do
- mapM_ putStrLn
- [ "Successfully recovered repository!"
- , "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok."
- ]
- return (True, modifiedbranches)
+ successfulfinish modifiedbranches
+ | null modifiedbranches = do
+ mapM_ putStrLn
+ [ "Successfully recovered repository!"
+ , "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok."
+ ]
+ return (True, modifiedbranches)
+ | otherwise = do
+ unless (repoIsLocalBare g) $ do
+ mcurr <- Branch.currentUnsafe g
+ case mcurr of
+ Nothing -> return ()
+ Just curr -> when (any (== curr) modifiedbranches) $ do
+ putStrLn $ unwords
+ [ "You currently have"
+ , fromRef curr
+ , "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
+ ]
+ putStrLn "Successfully recovered repository!"
+ putStrLn "Please carefully check that the changes mentioned above are ok.."
+ return (True, modifiedbranches)
+
unsuccessfulfinish = do
if repoIsLocalBare g
then do
diff --git a/Logs/FsckResults.hs b/Logs/FsckResults.hs
index 3538bdc40..619dd586c 100644
--- a/Logs/FsckResults.hs
+++ b/Logs/FsckResults.hs
@@ -23,25 +23,31 @@ writeFsckResults u fsckresults = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u
liftIO $
case fsckresults of
- FsckFailed -> store S.empty logfile
- FsckFoundMissing s
+ FsckFailed -> store S.empty False logfile
+ FsckFoundMissing s t
| S.null s -> nukeFile logfile
- | otherwise -> store s logfile
+ | otherwise -> store s t logfile
where
- store s logfile = do
+ store s t logfile = do
createDirectoryIfMissing True (parentDir logfile)
- liftIO $ viaTmp writeFile logfile $ serialize s
- serialize = unlines . map fromRef . S.toList
+ liftIO $ viaTmp writeFile logfile $ serialize s t
+ serialize s t =
+ let ls = map fromRef (S.toList s)
+ in if t
+ then unlines ("truncated":ls)
+ else unlines ls
readFsckResults :: UUID -> Annex FsckResults
readFsckResults u = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u
- liftIO $ catchDefaultIO (FsckFoundMissing S.empty) $
- deserialize <$> readFile logfile
+ liftIO $ catchDefaultIO (FsckFoundMissing S.empty False) $
+ deserialize . lines <$> readFile logfile
where
- deserialize l =
- let s = S.fromList $ map Ref $ lines l
- in if S.null s then FsckFailed else FsckFoundMissing s
+ deserialize ("truncated":ls) = deserialize' ls True
+ deserialize ls = deserialize' ls False
+ deserialize' ls t =
+ let s = S.fromList $ map Ref ls
+ in if S.null s then FsckFailed else FsckFoundMissing s t
clearFsckResults :: UUID -> Annex ()
clearFsckResults = liftIO . nukeFile <=< fromRepo . gitAnnexFsckResultsLog
diff --git a/doc/bugs/enormous_fsck_output_OOM.mdwn b/doc/bugs/enormous_fsck_output_OOM.mdwn
index 975674b5c..b06655354 100644
--- a/doc/bugs/enormous_fsck_output_OOM.mdwn
+++ b/doc/bugs/enormous_fsck_output_OOM.mdwn
@@ -18,3 +18,13 @@ So I tried to follow your advice here and increase the stack:
git-annex: Most RTS options are disabled. Link with -rtsopts to enable them.
I wasn't sure what to do next, so any help would be appreciated.
+
+> Now only 20k problem shas max (more likely 10k) are collected from fsck,
+> so it won't use much memory (60 mb or so). If it had to truncate
+> shas from fsck, it will re-run fsck after the repair process,
+> which should either find no problems left (common when eg when all missing shas
+> were able to be fetched from remotes), or find a new set of problem
+> shas, which it can feed back through the repair process.
+>
+> If the repository is very large, this means more work, but it shouldn't
+> run out of memory now. [[fixed|done]] --[[Joey]]