diff options
author | Joey Hess <joey@kitenet.net> | 2013-10-21 15:28:06 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2013-10-21 15:28:06 -0400 |
commit | 727566ad23efc6c36a4a4473b3299af913f6ce03 (patch) | |
tree | 8dd85a47703021db232b02e47d371037cb0715c3 /Git | |
parent | 1f7604f7d9df831012893a9ee2b4a8be20fe02ab (diff) |
implemented removal of corrupt tracking branches
Oh, git, you made this so hard. Not determining if a branch pointed to some
corrupt object, that was easy, but dealing with corrupt branches using git
plumbing is a PITA.
Diffstat (limited to 'Git')
-rw-r--r-- | Git/Fsck.hs | 9 | ||||
-rw-r--r-- | Git/LsTree.hs | 9 | ||||
-rw-r--r-- | Git/RecoverRepository.hs | 181 |
3 files changed, 180 insertions, 19 deletions
diff --git a/Git/Fsck.hs b/Git/Fsck.hs index a43a84f3e..5fdc73385 100644 --- a/Git/Fsck.hs +++ b/Git/Fsck.hs @@ -7,7 +7,8 @@ module Git.Fsck ( findBroken, - findMissing + findMissing, + MissingObjects ) where import Common @@ -18,6 +19,8 @@ import Git.CatFile import qualified Data.Set as S +type MissingObjects = S.Set Sha + {- Runs fsck to find some of the broken objects in the repository. - May not find all broken objects, if fsck fails on bad data in some of - the broken objects it does find. If the fsck fails generally without @@ -28,7 +31,7 @@ import qualified Data.Set as S - to be a git sha. Not all such shas are of broken objects, so ask git - to try to cat the object, and see if it fails. -} -findBroken :: Repo -> IO (Maybe (S.Set Sha)) +findBroken :: Repo -> IO (Maybe MissingObjects) findBroken r = do (output, fsckok) <- processTranscript "git" (toCommand $ fsckParams r) Nothing let objs = parseFsckOutput output @@ -39,7 +42,7 @@ findBroken r = do {- Finds objects that are missing from the git repsitory, or are corrupt. - Note that catting a corrupt object will cause cat-file to crash. -} -findMissing :: [Sha] -> Repo -> IO (S.Set Sha) +findMissing :: [Sha] -> Repo -> IO MissingObjects findMissing objs r = go objs [] =<< start where start = catFileStart' False r diff --git a/Git/LsTree.hs b/Git/LsTree.hs index a2b47da3a..956f9f5b4 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -8,6 +8,7 @@ module Git.LsTree ( TreeItem(..), lsTree, + lsTreeParams, lsTreeFiles, parseLsTree ) where @@ -33,9 +34,11 @@ data TreeItem = TreeItem {- Lists the complete contents of a tree, recursing into sub-trees, - with lazy output. -} lsTree :: Ref -> Repo -> IO [TreeItem] -lsTree t repo = map parseLsTree <$> pipeNullSplitZombie ps repo - where - ps = [Params "ls-tree --full-tree -z -r --", File $ show t] +lsTree t repo = map parseLsTree + <$> pipeNullSplitZombie (lsTreeParams t) repo + +lsTreeParams :: Ref -> [CommandParam] +lsTreeParams t = [ Params "ls-tree --full-tree -z -r --", File $ show t ] {- Lists specified files in a tree. -} lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem] diff --git a/Git/RecoverRepository.hs b/Git/RecoverRepository.hs index aad78d0dd..f40450f46 100644 --- a/Git/RecoverRepository.hs +++ b/Git/RecoverRepository.hs @@ -10,6 +10,7 @@ module Git.RecoverRepository ( retrieveMissingObjects, resetLocalBranches, removeTrackingBranches, + emptyGoodCommits, ) where import Common @@ -17,12 +18,12 @@ import Git import Git.Command import Git.Fsck import Git.Objects -import Git.HashObject -import Git.Types +import Git.Sha import qualified Git.Config import qualified Git.Construct +import qualified Git.LsTree as LsTree +import qualified Git.Ref as Ref import Utility.Tmp -import Utility.Monad import Utility.Rsync import qualified Data.Set as S @@ -39,7 +40,7 @@ import System.Log.Logger - To remove corrupt objects, unpack all packs, and remove the packs - (to handle corrupt packs), and remove loose object files. -} -cleanCorruptObjects :: Repo -> IO (S.Set Sha) +cleanCorruptObjects :: Repo -> IO MissingObjects cleanCorruptObjects r = do notice "Running git fsck ..." check =<< findBroken r @@ -79,7 +80,7 @@ cleanCorruptObjects r = do then return s else retry s -removeLoose :: Repo -> S.Set Sha -> IO Bool +removeLoose :: Repo -> MissingObjects -> IO Bool removeLoose r s = do let fs = map (looseObjectFile r) (S.toList s) count <- length <$> filterM doesFileExist fs @@ -115,7 +116,7 @@ explodePacks r = do {- Try to retrieve a set of missing objects, from the remotes of a - repository. Returns any that could not be retreived. -} -retrieveMissingObjects :: S.Set Sha -> Repo -> IO (S.Set Sha) +retrieveMissingObjects :: MissingObjects -> Repo -> IO MissingObjects retrieveMissingObjects missing r | S.null missing = return missing | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do @@ -127,7 +128,7 @@ retrieveMissingObjects missing r then return stillmissing else pullremotes tmpr (remotes r) fetchallrefs stillmissing where - pullremotes tmpr [] _ stillmissing = return stillmissing + pullremotes _tmpr [] _ stillmissing = return stillmissing pullremotes tmpr (rmt:rmts) fetchrefs s | S.null s = return s | otherwise = do @@ -170,16 +171,170 @@ copyObjects srcr destr = rsync - local branches to point to an old commit before the missing - objects. -} -resetLocalBranches :: S.Set Sha -> Repo -> IO [Branch] -resetLocalBranches missing r = do +resetLocalBranches :: MissingObjects -> GoodCommits -> Repo -> IO [Branch] +resetLocalBranches missing goodcommits r = do error "TODO" {- To deal with missing objects that cannot be recovered, removes - - any remote tracking branches that reference them. + - any remote tracking branches that reference them. Returns a list of + - all removed branches. -} -removeTrackingBranches :: S.Set Sha -> Repo -> IO [Branch] -removeTrackingBranches missing r = do - error "TODO" +removeTrackingBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], GoodCommits) +removeTrackingBranches missing goodcommits r = + go [] goodcommits =<< filter istrackingbranch <$> getAllRefs r + where + istrackingbranch b = "refs/remotes/" `isPrefixOf` show b + go removed gcs [] = return (removed, gcs) + go removed gcs (b:bs) = do + (ok, gcs') <- verifyCommit missing gcs b r + if ok + then go removed gcs' bs + else do + nukeBranchRef b r + go (b:removed) gcs' bs + +{- Gets all refs, including ones that are corrupt. + - git show-ref does not output refs to commits that are directly + - corrupted, so it is not used. + -} +getAllRefs :: Repo -> IO [Ref] +getAllRefs r = do + packedrs <- mapMaybe parsePacked . lines + <$> catchDefaultIO "" (readFile $ packedRefsFile r) + loosers <- map toref <$> dirContentsRecursive (localGitDir r </> "refs") + return $ packedrs ++ loosers + where + refdir = localGitDir r </> "refs" + toref = Ref . relPathDirToFile (localGitDir r) + +packedRefsFile :: Repo -> FilePath +packedRefsFile r = localGitDir r </> "packed-refs" + +parsePacked :: String -> Maybe Ref +parsePacked l = case words l of + (sha:ref:[]) + | isJust (extractSha sha) -> Just $ Ref ref + _ -> Nothing + +{- git-branch -d cannot be used to remove a branch that is directly + - pointing to a corrupt commit. However, it's tried first. -} +nukeBranchRef :: Branch -> Repo -> IO () +nukeBranchRef b r = void $ usegit <||> byhand + where + usegit = runBool + [ Param "branch" + , Params "-r -d" + , Param $ show $ Ref.base b + ] r + byhand = do + nukeFile $ localGitDir r </> show b + whenM (doesFileExist packedrefs) $ + withTmpFile "packed-refs" $ \tmp h -> do + ls <- lines <$> readFile packedrefs + hPutStr h $ unlines $ + filter (not . skiprefline) ls + hClose h + renameFile tmp packedrefs + return True + skiprefline l = case parsePacked l of + Just packedref + | packedref == b -> True + _ -> False + packedrefs = packedRefsFile r + +{- Finds the most recent commit to a branch that does not need any + - of the missing objects. If the input branch is good as-is, returns it. + - Otherwise, tries to traverse the commits in the branch to find one + - that is ok (might fail, if one of them is corrupt). + -} +findUncorruptedCommit :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits) +findUncorruptedCommit missing goodcommits branch r = do + (ok, goodcommits') <- verifyCommit missing goodcommits branch r + if ok + then return (Just branch, goodcommits') + else do + (ls, cleanup) <- pipeNullSplit + [ Param "log" + , Param "--format=%H" + , Param (show branch) + ] r + cleanup `after` findfirst goodcommits (catMaybes $ map extractSha ls) + where + findfirst gcs [] = return (Nothing, gcs) + findfirst gcs (c:cs) = do + (ok, gcs') <- verifyCommit missing gcs c r + if ok + then return (Just c, gcs') + else findfirst gcs' cs + +{- Looks through the reflog to find an old version of a branch that + - does not need any of the missing objects. + -} +findOldBranch :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits) +findOldBranch missing goodcommits branch r = error "TODO" + +{- Verifies tha none of the missing objects in the set are used by + - the commit. Also adds to a set of commit shas that have been verified to + - be good, which can be passed into subsequent calls to avoid + - redundant work when eg, chasing down branches to find the first + - uncorrupted commit. -} +verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO (Bool, GoodCommits) +verifyCommit missing goodcommits commit r + | checkGoodCommit commit goodcommits = return (True, goodcommits) + | otherwise = do + (ls, cleanup) <- pipeNullSplit + [ Param "log" + , Param "--format=%H %T" + , Param (show commit) + ] r + let committrees = map parse ls + if any isNothing committrees + then do + void cleanup + return (False, goodcommits) + else do + let cts = catMaybes committrees + ifM (cleanup <&&> check cts) + ( return (True, addGoodCommits (map fst cts) goodcommits) + , return (False, goodcommits) + ) + where + parse l = case words l of + (commitsha:treesha:[]) -> (,) + <$> extractSha commitsha + <*> extractSha treesha + _ -> Nothing + check [] = return False + check ((commit, tree):rest) + | checkGoodCommit commit goodcommits = return True + | otherwise = verifyTree missing tree r <&&> check rest + +{- Verifies that a tree is good, including all trees and blobs + - referenced by it. -} +verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool +verifyTree missing treesha r + | S.member treesha missing = return False + | otherwise = do + (ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha) r + let objshas = map (extractSha . LsTree.sha . LsTree.parseLsTree) ls + if any isNothing objshas || any (`S.member` missing) (catMaybes objshas) + then do + void cleanup + return False + -- as long as ls-tree succeeded, we're good + else cleanup + +newtype GoodCommits = GoodCommits (S.Set Sha) + +emptyGoodCommits :: GoodCommits +emptyGoodCommits = GoodCommits S.empty + +checkGoodCommit :: Sha -> GoodCommits -> Bool +checkGoodCommit sha (GoodCommits s) = S.member sha s + +addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits +addGoodCommits shas (GoodCommits s) = GoodCommits $ + S.union s (S.fromList shas) notice :: String -> IO () notice = noticeM "RecoverRepository" |