From 727566ad23efc6c36a4a4473b3299af913f6ce03 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 21 Oct 2013 15:28:06 -0400 Subject: implemented removal of corrupt tracking branches Oh, git, you made this so hard. Not determining if a branch pointed to some corrupt object, that was easy, but dealing with corrupt branches using git plumbing is a PITA. --- Git/Fsck.hs | 9 +- Git/LsTree.hs | 9 +- Git/RecoverRepository.hs | 181 +++++++++++++++++++++++++++++++++++++--- doc/git-recover-repository.mdwn | 16 +++- git-recover-repository.hs | 4 +- 5 files changed, 195 insertions(+), 24 deletions(-) diff --git a/Git/Fsck.hs b/Git/Fsck.hs index a43a84f3e..5fdc73385 100644 --- a/Git/Fsck.hs +++ b/Git/Fsck.hs @@ -7,7 +7,8 @@ module Git.Fsck ( findBroken, - findMissing + findMissing, + MissingObjects ) where import Common @@ -18,6 +19,8 @@ import Git.CatFile import qualified Data.Set as S +type MissingObjects = S.Set Sha + {- Runs fsck to find some of the broken objects in the repository. - May not find all broken objects, if fsck fails on bad data in some of - the broken objects it does find. If the fsck fails generally without @@ -28,7 +31,7 @@ import qualified Data.Set as S - to be a git sha. Not all such shas are of broken objects, so ask git - to try to cat the object, and see if it fails. -} -findBroken :: Repo -> IO (Maybe (S.Set Sha)) +findBroken :: Repo -> IO (Maybe MissingObjects) findBroken r = do (output, fsckok) <- processTranscript "git" (toCommand $ fsckParams r) Nothing let objs = parseFsckOutput output @@ -39,7 +42,7 @@ findBroken r = do {- Finds objects that are missing from the git repsitory, or are corrupt. - Note that catting a corrupt object will cause cat-file to crash. -} -findMissing :: [Sha] -> Repo -> IO (S.Set Sha) +findMissing :: [Sha] -> Repo -> IO MissingObjects findMissing objs r = go objs [] =<< start where start = catFileStart' False r diff --git a/Git/LsTree.hs b/Git/LsTree.hs index a2b47da3a..956f9f5b4 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -8,6 +8,7 @@ module Git.LsTree ( TreeItem(..), lsTree, + lsTreeParams, lsTreeFiles, parseLsTree ) where @@ -33,9 +34,11 @@ data TreeItem = TreeItem {- Lists the complete contents of a tree, recursing into sub-trees, - with lazy output. -} lsTree :: Ref -> Repo -> IO [TreeItem] -lsTree t repo = map parseLsTree <$> pipeNullSplitZombie ps repo - where - ps = [Params "ls-tree --full-tree -z -r --", File $ show t] +lsTree t repo = map parseLsTree + <$> pipeNullSplitZombie (lsTreeParams t) repo + +lsTreeParams :: Ref -> [CommandParam] +lsTreeParams t = [ Params "ls-tree --full-tree -z -r --", File $ show t ] {- Lists specified files in a tree. -} lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem] diff --git a/Git/RecoverRepository.hs b/Git/RecoverRepository.hs index aad78d0dd..f40450f46 100644 --- a/Git/RecoverRepository.hs +++ b/Git/RecoverRepository.hs @@ -10,6 +10,7 @@ module Git.RecoverRepository ( retrieveMissingObjects, resetLocalBranches, removeTrackingBranches, + emptyGoodCommits, ) where import Common @@ -17,12 +18,12 @@ import Git import Git.Command import Git.Fsck import Git.Objects -import Git.HashObject -import Git.Types +import Git.Sha import qualified Git.Config import qualified Git.Construct +import qualified Git.LsTree as LsTree +import qualified Git.Ref as Ref import Utility.Tmp -import Utility.Monad import Utility.Rsync import qualified Data.Set as S @@ -39,7 +40,7 @@ import System.Log.Logger - To remove corrupt objects, unpack all packs, and remove the packs - (to handle corrupt packs), and remove loose object files. -} -cleanCorruptObjects :: Repo -> IO (S.Set Sha) +cleanCorruptObjects :: Repo -> IO MissingObjects cleanCorruptObjects r = do notice "Running git fsck ..." check =<< findBroken r @@ -79,7 +80,7 @@ cleanCorruptObjects r = do then return s else retry s -removeLoose :: Repo -> S.Set Sha -> IO Bool +removeLoose :: Repo -> MissingObjects -> IO Bool removeLoose r s = do let fs = map (looseObjectFile r) (S.toList s) count <- length <$> filterM doesFileExist fs @@ -115,7 +116,7 @@ explodePacks r = do {- Try to retrieve a set of missing objects, from the remotes of a - repository. Returns any that could not be retreived. -} -retrieveMissingObjects :: S.Set Sha -> Repo -> IO (S.Set Sha) +retrieveMissingObjects :: MissingObjects -> Repo -> IO MissingObjects retrieveMissingObjects missing r | S.null missing = return missing | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do @@ -127,7 +128,7 @@ retrieveMissingObjects missing r then return stillmissing else pullremotes tmpr (remotes r) fetchallrefs stillmissing where - pullremotes tmpr [] _ stillmissing = return stillmissing + pullremotes _tmpr [] _ stillmissing = return stillmissing pullremotes tmpr (rmt:rmts) fetchrefs s | S.null s = return s | otherwise = do @@ -170,16 +171,170 @@ copyObjects srcr destr = rsync - local branches to point to an old commit before the missing - objects. -} -resetLocalBranches :: S.Set Sha -> Repo -> IO [Branch] -resetLocalBranches missing r = do +resetLocalBranches :: MissingObjects -> GoodCommits -> Repo -> IO [Branch] +resetLocalBranches missing goodcommits r = do error "TODO" {- To deal with missing objects that cannot be recovered, removes - - any remote tracking branches that reference them. + - any remote tracking branches that reference them. Returns a list of + - all removed branches. -} -removeTrackingBranches :: S.Set Sha -> Repo -> IO [Branch] -removeTrackingBranches missing r = do - error "TODO" +removeTrackingBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], GoodCommits) +removeTrackingBranches missing goodcommits r = + go [] goodcommits =<< filter istrackingbranch <$> getAllRefs r + where + istrackingbranch b = "refs/remotes/" `isPrefixOf` show b + go removed gcs [] = return (removed, gcs) + go removed gcs (b:bs) = do + (ok, gcs') <- verifyCommit missing gcs b r + if ok + then go removed gcs' bs + else do + nukeBranchRef b r + go (b:removed) gcs' bs + +{- Gets all refs, including ones that are corrupt. + - git show-ref does not output refs to commits that are directly + - corrupted, so it is not used. + -} +getAllRefs :: Repo -> IO [Ref] +getAllRefs r = do + packedrs <- mapMaybe parsePacked . lines + <$> catchDefaultIO "" (readFile $ packedRefsFile r) + loosers <- map toref <$> dirContentsRecursive (localGitDir r "refs") + return $ packedrs ++ loosers + where + refdir = localGitDir r "refs" + toref = Ref . relPathDirToFile (localGitDir r) + +packedRefsFile :: Repo -> FilePath +packedRefsFile r = localGitDir r "packed-refs" + +parsePacked :: String -> Maybe Ref +parsePacked l = case words l of + (sha:ref:[]) + | isJust (extractSha sha) -> Just $ Ref ref + _ -> Nothing + +{- git-branch -d cannot be used to remove a branch that is directly + - pointing to a corrupt commit. However, it's tried first. -} +nukeBranchRef :: Branch -> Repo -> IO () +nukeBranchRef b r = void $ usegit <||> byhand + where + usegit = runBool + [ Param "branch" + , Params "-r -d" + , Param $ show $ Ref.base b + ] r + byhand = do + nukeFile $ localGitDir r show b + whenM (doesFileExist packedrefs) $ + withTmpFile "packed-refs" $ \tmp h -> do + ls <- lines <$> readFile packedrefs + hPutStr h $ unlines $ + filter (not . skiprefline) ls + hClose h + renameFile tmp packedrefs + return True + skiprefline l = case parsePacked l of + Just packedref + | packedref == b -> True + _ -> False + packedrefs = packedRefsFile r + +{- Finds the most recent commit to a branch that does not need any + - of the missing objects. If the input branch is good as-is, returns it. + - Otherwise, tries to traverse the commits in the branch to find one + - that is ok (might fail, if one of them is corrupt). + -} +findUncorruptedCommit :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits) +findUncorruptedCommit missing goodcommits branch r = do + (ok, goodcommits') <- verifyCommit missing goodcommits branch r + if ok + then return (Just branch, goodcommits') + else do + (ls, cleanup) <- pipeNullSplit + [ Param "log" + , Param "--format=%H" + , Param (show branch) + ] r + cleanup `after` findfirst goodcommits (catMaybes $ map extractSha ls) + where + findfirst gcs [] = return (Nothing, gcs) + findfirst gcs (c:cs) = do + (ok, gcs') <- verifyCommit missing gcs c r + if ok + then return (Just c, gcs') + else findfirst gcs' cs + +{- Looks through the reflog to find an old version of a branch that + - does not need any of the missing objects. + -} +findOldBranch :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits) +findOldBranch missing goodcommits branch r = error "TODO" + +{- Verifies tha none of the missing objects in the set are used by + - the commit. Also adds to a set of commit shas that have been verified to + - be good, which can be passed into subsequent calls to avoid + - redundant work when eg, chasing down branches to find the first + - uncorrupted commit. -} +verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO (Bool, GoodCommits) +verifyCommit missing goodcommits commit r + | checkGoodCommit commit goodcommits = return (True, goodcommits) + | otherwise = do + (ls, cleanup) <- pipeNullSplit + [ Param "log" + , Param "--format=%H %T" + , Param (show commit) + ] r + let committrees = map parse ls + if any isNothing committrees + then do + void cleanup + return (False, goodcommits) + else do + let cts = catMaybes committrees + ifM (cleanup <&&> check cts) + ( return (True, addGoodCommits (map fst cts) goodcommits) + , return (False, goodcommits) + ) + where + parse l = case words l of + (commitsha:treesha:[]) -> (,) + <$> extractSha commitsha + <*> extractSha treesha + _ -> Nothing + check [] = return False + check ((commit, tree):rest) + | checkGoodCommit commit goodcommits = return True + | otherwise = verifyTree missing tree r <&&> check rest + +{- Verifies that a tree is good, including all trees and blobs + - referenced by it. -} +verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool +verifyTree missing treesha r + | S.member treesha missing = return False + | otherwise = do + (ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha) r + let objshas = map (extractSha . LsTree.sha . LsTree.parseLsTree) ls + if any isNothing objshas || any (`S.member` missing) (catMaybes objshas) + then do + void cleanup + return False + -- as long as ls-tree succeeded, we're good + else cleanup + +newtype GoodCommits = GoodCommits (S.Set Sha) + +emptyGoodCommits :: GoodCommits +emptyGoodCommits = GoodCommits S.empty + +checkGoodCommit :: Sha -> GoodCommits -> Bool +checkGoodCommit sha (GoodCommits s) = S.member sha s + +addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits +addGoodCommits shas (GoodCommits s) = GoodCommits $ + S.union s (S.fromList shas) notice :: String -> IO () notice = noticeM "RecoverRepository" diff --git a/doc/git-recover-repository.mdwn b/doc/git-recover-repository.mdwn index b05903d14..d4972e961 100644 --- a/doc/git-recover-repository.mdwn +++ b/doc/git-recover-repository.mdwn @@ -15,9 +15,19 @@ It does by deleting all corrupt objects, and retreiving all missing objects that it can from the remotes of the repository. If that is not sufficient to fully recover the repository, it can also -reset branches back to commits before the corruption happened. It will only -do this if run with the --force option, since that rewrites history -and throws out missing data. +reset branches back to commits before the corruption happened, and delete +branches that are no longer available due to the lost data. It will only +do this if run with the `--force` option, since that rewrites history +and throws out missing data. Note that the `--force` option never touches +tags, even if they are no longer usable due to missing data. + +After running this command, you will probably want to run `git fsck` to +verify it fixed the repository. Note that fsck may still complain about +objects referenced by the reflog, if they were unable to be recovered. +Use `git fsck --no-reflogs` to skip such objects. + +Since this command unpacks all packs in the repository, you may want to +run `git gc` afterwards. # AUTHOR diff --git a/git-recover-repository.hs b/git-recover-repository.hs index 6654057fa..66b1708f8 100644 --- a/git-recover-repository.hs +++ b/git-recover-repository.hs @@ -59,14 +59,14 @@ main = do ] if forced then do - remotebranches <- Git.RecoverRepository.removeTrackingBranches stillmissing g + (remotebranches, goodcommits) <- Git.RecoverRepository.removeTrackingBranches stillmissing Git.RecoverRepository.emptyGoodCommits g unless (null remotebranches) $ putStrLn $ unwords [ "removed" , show (length remotebranches) , "remote tracking branches that referred to missing objects" ] - localbranches <- Git.RecoverRepository.resetLocalBranches stillmissing g + localbranches <- Git.RecoverRepository.resetLocalBranches stillmissing goodcommits g unless (null localbranches) $ do putStrLn "Reset these local branches to old versions before the missing objects were committed:" putStr $ unlines $ map show localbranches -- cgit v1.2.3