summaryrefslogtreecommitdiff
path: root/Git/Repair.hs
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-10-23 12:58:01 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-10-23 13:00:45 -0400
commitb77b0848ee28093b63e3ab0d1ea494e430ffe58b (patch)
tree7bcc235a07319bb4461373b5d0656ddc90aad357 /Git/Repair.hs
parentfb2ccfd60ff09c1b1d03838d42eba3c65fd7fb27 (diff)
repair command: add handling of git-annex branch and index
Diffstat (limited to 'Git/Repair.hs')
-rw-r--r--Git/Repair.hs484
1 files changed, 484 insertions, 0 deletions
diff --git a/Git/Repair.hs b/Git/Repair.hs
new file mode 100644
index 000000000..bb540fbd7
--- /dev/null
+++ b/Git/Repair.hs
@@ -0,0 +1,484 @@
+{- git repository recovery
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.Repair (
+ runRepair,
+ cleanCorruptObjects,
+ retrieveMissingObjects,
+ resetLocalBranches,
+ removeTrackingBranches,
+ rewriteIndex,
+ checkIndex,
+ emptyGoodCommits,
+) where
+
+import Common
+import Git
+import Git.Command
+import Git.Objects
+import Git.Sha
+import Git.Types
+import Git.Fsck
+import qualified Git.Config as Config
+import qualified Git.Construct as Construct
+import qualified Git.LsTree as LsTree
+import qualified Git.LsFiles as LsFiles
+import qualified Git.Ref as Ref
+import qualified Git.RefLog as RefLog
+import qualified Git.UpdateIndex as UpdateIndex
+import qualified Git.Branch as Branch
+import Utility.Tmp
+import Utility.Rsync
+
+import qualified Data.Set as S
+import qualified Data.ByteString.Lazy as L
+import Data.Tuple.Utils
+
+{- Given a set of bad objects found by git fsck, removes all
+ - corrupt objects, and returns a list of missing objects,
+ - which need to be found elsewhere to finish recovery.
+ -
+ - Since git fsck may crash on corrupt objects, and so not
+ - report the full set of corrupt or missing objects,
+ - this removes corrupt objects, and re-runs fsck, until it
+ - stabalizes.
+ -
+ - To remove corrupt objects, unpack all packs, and remove the packs
+ - (to handle corrupt packs), and remove loose object files.
+ -}
+cleanCorruptObjects :: FsckResults -> Repo -> IO MissingObjects
+cleanCorruptObjects mmissing r = check mmissing
+ where
+ check Nothing = do
+ putStrLn "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?"
+ ifM (explodePacks r)
+ ( retry S.empty
+ , return S.empty
+ )
+ check (Just bad)
+ | S.null bad = return S.empty
+ | otherwise = do
+ putStrLn $ unwords
+ [ "git fsck found"
+ , show (S.size bad)
+ , "broken objects."
+ ]
+ exploded <- explodePacks r
+ removed <- removeLoose r bad
+ if exploded || removed
+ then retry bad
+ else return bad
+ retry oldbad = do
+ putStrLn "Re-running git fsck to see if it finds more problems."
+ v <- findBroken False r
+ case v of
+ Nothing -> error $ unwords
+ [ "git fsck found a problem, which was not corrected after removing"
+ , show (S.size oldbad)
+ , "corrupt objects."
+ ]
+ Just newbad -> do
+ removed <- removeLoose r newbad
+ let s = S.union oldbad newbad
+ if not removed || s == oldbad
+ then return s
+ else retry s
+
+removeLoose :: Repo -> MissingObjects -> IO Bool
+removeLoose r s = do
+ let fs = map (looseObjectFile r) (S.toList s)
+ count <- length <$> filterM doesFileExist fs
+ if (count > 0)
+ then do
+ putStrLn $ unwords
+ [ "removing"
+ , show count
+ , "corrupt loose objects"
+ ]
+ mapM_ nukeFile fs
+ return True
+ else return False
+
+explodePacks :: Repo -> IO Bool
+explodePacks r = do
+ packs <- listPackFiles r
+ if null packs
+ then return False
+ else do
+ putStrLn "Unpacking all pack files."
+ mapM_ go packs
+ return True
+ where
+ go packfile = do
+ -- May fail, if pack file is corrupt.
+ void $ tryIO $
+ pipeWrite [Param "unpack-objects"] r $ \h ->
+ L.hPut h =<< L.readFile packfile
+ nukeFile packfile
+ nukeFile $ packIdxFile packfile
+
+{- Try to retrieve a set of missing objects, from the remotes of a
+ - repository. Returns any that could not be retreived.
+ -}
+retrieveMissingObjects :: MissingObjects -> Repo -> IO MissingObjects
+retrieveMissingObjects missing r
+ | S.null missing = return missing
+ | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
+ unlessM (boolSystem "git" [Params "init", File tmpdir]) $
+ error $ "failed to create temp repository in " ++ tmpdir
+ tmpr <- Config.read =<< Construct.fromAbsPath tmpdir
+ stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
+ if S.null stillmissing
+ then return stillmissing
+ else pullremotes tmpr (remotes r) fetchallrefs stillmissing
+ where
+ pullremotes _tmpr [] _ stillmissing = return stillmissing
+ pullremotes tmpr (rmt:rmts) fetchrefs s
+ | S.null s = return s
+ | otherwise = do
+ putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
+ ifM (fetchsome rmt fetchrefs tmpr)
+ ( do
+ void $ copyObjects tmpr r
+ stillmissing <- findMissing (S.toList s) r
+ pullremotes tmpr rmts fetchrefs stillmissing
+ , do
+ putStrLn $ unwords
+ [ "failed to fetch from remote"
+ , repoDescribe rmt
+ , "(will continue without it, but making this remote available may improve recovery)"
+ ]
+ pullremotes tmpr rmts fetchrefs s
+ )
+ fetchsome rmt ps = runBool $
+ [ Param "fetch"
+ , Param (repoLocation rmt)
+ , Params "--force --update-head-ok --quiet"
+ ] ++ ps
+ -- fetch refs and tags
+ fetchrefstags = [ Param "+refs/heads/*:refs/heads/*", Param "--tags"]
+ -- Fetch all available refs (more likely to fail,
+ -- as the remote may have refs it refuses to send).
+ fetchallrefs = [ Param "+*:*" ]
+
+{- Copies all objects from the src repository to the dest repository.
+ - This is done using rsync, so it copies all missing object, and all
+ - objects they rely on. -}
+copyObjects :: Repo -> Repo -> IO Bool
+copyObjects srcr destr = rsync
+ [ Param "-qr"
+ , File $ addTrailingPathSeparator $ objectsDir srcr
+ , File $ addTrailingPathSeparator $ objectsDir destr
+ ]
+
+{- To deal with missing objects that cannot be recovered, resets any
+ - local branches to point to an old commit before the missing
+ - objects. Returns all branches that were changed, and deleted.
+ -}
+resetLocalBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], [Branch], GoodCommits)
+resetLocalBranches missing goodcommits r =
+ go [] [] goodcommits =<< filter islocalbranch <$> getAllRefs r
+ where
+ islocalbranch b = "refs/heads/" `isPrefixOf` show b
+ go changed deleted gcs [] = return (changed, deleted, gcs)
+ go changed deleted gcs (b:bs) = do
+ (mc, gcs') <- findUncorruptedCommit missing gcs b r
+ case mc of
+ Just c
+ | c == b -> go changed deleted gcs' bs
+ | otherwise -> do
+ reset b c
+ go (b:changed) deleted gcs' bs
+ Nothing -> do
+ nukeBranchRef b r
+ go changed (b:deleted) gcs' bs
+ reset b c = do
+ nukeBranchRef b r
+ void $ runBool
+ [ Param "branch"
+ , Param (show $ Ref.base b)
+ , Param (show c)
+ ] r
+
+{- To deal with missing objects that cannot be recovered, removes
+ - any remote tracking branches that reference them. Returns a list of
+ - all removed branches.
+ -}
+removeTrackingBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], GoodCommits)
+removeTrackingBranches missing goodcommits r =
+ go [] goodcommits =<< filter istrackingbranch <$> getAllRefs r
+ where
+ istrackingbranch b = "refs/remotes/" `isPrefixOf` show b
+ go removed gcs [] = return (removed, gcs)
+ go removed gcs (b:bs) = do
+ (ok, gcs') <- verifyCommit missing gcs b r
+ if ok
+ then go removed gcs' bs
+ else do
+ nukeBranchRef b r
+ go (b:removed) gcs' bs
+
+{- Gets all refs, including ones that are corrupt.
+ - git show-ref does not output refs to commits that are directly
+ - corrupted, so it is not used.
+ -}
+getAllRefs :: Repo -> IO [Ref]
+getAllRefs r = do
+ packedrs <- mapMaybe parsePacked . lines
+ <$> catchDefaultIO "" (readFile $ packedRefsFile r)
+ loosers <- map toref <$> dirContentsRecursive refdir
+ return $ packedrs ++ loosers
+ where
+ refdir = localGitDir r </> "refs"
+ toref = Ref . relPathDirToFile (localGitDir r)
+
+packedRefsFile :: Repo -> FilePath
+packedRefsFile r = localGitDir r </> "packed-refs"
+
+parsePacked :: String -> Maybe Ref
+parsePacked l = case words l of
+ (sha:ref:[])
+ | isJust (extractSha sha) -> Just $ Ref ref
+ _ -> Nothing
+
+{- git-branch -d cannot be used to remove a branch that is directly
+ - pointing to a corrupt commit. However, it's tried first. -}
+nukeBranchRef :: Branch -> Repo -> IO ()
+nukeBranchRef b r = void $ usegit <||> byhand
+ where
+ usegit = runBool
+ [ Param "branch"
+ , Params "-r -d"
+ , Param $ show $ Ref.base b
+ ] r
+ byhand = do
+ nukeFile $ localGitDir r </> show b
+ whenM (doesFileExist packedrefs) $
+ withTmpFile "packed-refs" $ \tmp h -> do
+ ls <- lines <$> readFile packedrefs
+ hPutStr h $ unlines $
+ filter (not . skiprefline) ls
+ hClose h
+ renameFile tmp packedrefs
+ return True
+ skiprefline l = case parsePacked l of
+ Just packedref
+ | packedref == b -> True
+ _ -> False
+ packedrefs = packedRefsFile r
+
+{- Finds the most recent commit to a branch that does not need any
+ - of the missing objects. If the input branch is good as-is, returns it.
+ - Otherwise, tries to traverse the commits in the branch to find one
+ - that is ok. That might fail, if one of them is corrupt, or if an object
+ - at the root of the branch is missing. Finally, looks for an old version
+ - of the branch from the reflog.
+ -}
+findUncorruptedCommit :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
+findUncorruptedCommit missing goodcommits branch r = do
+ (ok, goodcommits') <- verifyCommit missing goodcommits branch r
+ if ok
+ then return (Just branch, goodcommits')
+ else do
+ (ls, cleanup) <- pipeNullSplit
+ [ Param "log"
+ , Param "-z"
+ , Param "--format=%H"
+ , Param (show branch)
+ ] r
+ let branchshas = catMaybes $ map extractSha ls
+ reflogshas <- RefLog.get branch r
+ -- XXX Could try a bit harder here, and look
+ -- for uncorrupted old commits in branches in the
+ -- reflog.
+ cleanup `after` findfirst goodcommits (branchshas ++ reflogshas)
+ where
+ findfirst gcs [] = return (Nothing, gcs)
+ findfirst gcs (c:cs) = do
+ (ok, gcs') <- verifyCommit missing gcs c r
+ if ok
+ then return (Just c, gcs')
+ else findfirst gcs' cs
+
+{- Verifies tha none of the missing objects in the set are used by
+ - the commit. Also adds to a set of commit shas that have been verified to
+ - be good, which can be passed into subsequent calls to avoid
+ - redundant work when eg, chasing down branches to find the first
+ - uncorrupted commit. -}
+verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO (Bool, GoodCommits)
+verifyCommit missing goodcommits commit r
+ | checkGoodCommit commit goodcommits = return (True, goodcommits)
+ | otherwise = do
+ (ls, cleanup) <- pipeNullSplit
+ [ Param "log"
+ , Param "-z"
+ , Param "--format=%H %T"
+ , Param (show commit)
+ ] r
+ let committrees = map parse ls
+ if any isNothing committrees || null committrees
+ then do
+ void cleanup
+ return (False, goodcommits)
+ else do
+ let cts = catMaybes committrees
+ ifM (cleanup <&&> check cts)
+ ( return (True, addGoodCommits (map fst cts) goodcommits)
+ , return (False, goodcommits)
+ )
+ where
+ parse l = case words l of
+ (commitsha:treesha:[]) -> (,)
+ <$> extractSha commitsha
+ <*> extractSha treesha
+ _ -> Nothing
+ check [] = return True
+ check ((c, t):rest)
+ | checkGoodCommit c goodcommits = return True
+ | otherwise = verifyTree missing t r <&&> check rest
+
+{- Verifies that a tree is good, including all trees and blobs
+ - referenced by it. -}
+verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
+verifyTree missing treesha r
+ | S.member treesha missing = return False
+ | otherwise = do
+ (ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha) r
+ let objshas = map (extractSha . LsTree.sha . LsTree.parseLsTree) ls
+ if any isNothing objshas || any (`S.member` missing) (catMaybes objshas)
+ then do
+ void cleanup
+ return False
+ -- as long as ls-tree succeeded, we're good
+ else cleanup
+
+{- Checks that the index file only refers to objects that are not missing. -}
+checkIndex :: MissingObjects -> Repo -> IO Bool
+checkIndex missing r = do
+ (bad, _good, cleanup) <- partitionIndex missing r
+ if null bad
+ then cleanup
+ else do
+ void cleanup
+ return False
+
+partitionIndex :: MissingObjects -> Repo -> IO ([LsFiles.StagedDetails], [LsFiles.StagedDetails], IO Bool)
+partitionIndex missing r = do
+ (indexcontents, cleanup) <- LsFiles.stagedDetails [repoPath r] r
+ let (bad, good) = partition ismissing indexcontents
+ return (bad, good, cleanup)
+ where
+ getblob (_file, Just sha, Just _mode) = Just sha
+ getblob _ = Nothing
+ ismissing = maybe False (`S.member` missing) . getblob
+
+{- Rewrites the index file, removing from it any files whose blobs are
+ - missing. Returns the list of affected files. -}
+rewriteIndex :: MissingObjects -> Repo -> IO [FilePath]
+rewriteIndex missing r
+ | repoIsLocalBare r = return []
+ | otherwise = do
+ (bad, good, cleanup) <- partitionIndex missing r
+ unless (null bad) $ do
+ nukeFile (localGitDir r </> "index")
+ UpdateIndex.streamUpdateIndex r
+ =<< (catMaybes <$> mapM reinject good)
+ void cleanup
+ return $ map fst3 bad
+ where
+ reinject (file, Just sha, Just mode) = case toBlobType mode of
+ Nothing -> return Nothing
+ Just blobtype -> Just <$>
+ UpdateIndex.stageFile sha blobtype file r
+ reinject _ = return Nothing
+
+newtype GoodCommits = GoodCommits (S.Set Sha)
+
+emptyGoodCommits :: GoodCommits
+emptyGoodCommits = GoodCommits S.empty
+
+checkGoodCommit :: Sha -> GoodCommits -> Bool
+checkGoodCommit sha (GoodCommits s) = S.member sha s
+
+addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits
+addGoodCommits shas (GoodCommits s) = GoodCommits $
+ S.union s (S.fromList shas)
+
+displayList :: [String] -> String -> IO ()
+displayList items header
+ | null items = return ()
+ | otherwise = do
+ putStrLn header
+ putStr $ unlines $ map (\i -> "\t" ++ i) truncateditems
+ where
+ numitems = length items
+ truncateditems
+ | numitems > 10 = take 10 items ++ ["(and " ++ show (numitems - 10) ++ " more)"]
+ | otherwise = items
+
+{- Put it all together. -}
+runRepair :: Bool -> Repo -> IO (Bool, MissingObjects)
+runRepair forced g = do
+ putStrLn "Running git fsck ..."
+ fsckresult <- findBroken False g
+ missing <- cleanCorruptObjects fsckresult g
+ stillmissing <- retrieveMissingObjects missing g
+ if S.null stillmissing
+ then successfulfinish stillmissing
+ else do
+ putStrLn $ unwords
+ [ show (S.size stillmissing)
+ , "missing objects could not be recovered!"
+ ]
+ if forced
+ then do
+ (remotebranches, goodcommits) <- removeTrackingBranches stillmissing emptyGoodCommits g
+ unless (null remotebranches) $
+ putStrLn $ unwords
+ [ "removed"
+ , show (length remotebranches)
+ , "remote tracking branches that referred to missing objects"
+ ]
+ (resetbranches, deletedbranches, _) <- resetLocalBranches stillmissing goodcommits g
+ displayList (map show resetbranches)
+ "Reset these local branches to old versions before the missing objects were committed:"
+ displayList (map show deletedbranches)
+ "Deleted these local branches, which could not be recovered due to missing objects:"
+ deindexedfiles <- rewriteIndex stillmissing g
+ displayList deindexedfiles
+ "Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate."
+ if null resetbranches && null deletedbranches
+ then successfulfinish stillmissing
+ else do
+ unless (repoIsLocalBare g) $ do
+ mcurr <- Branch.currentUnsafe g
+ case mcurr of
+ Nothing -> return ()
+ Just curr -> when (any (== curr) (resetbranches ++ deletedbranches)) $ do
+ putStrLn $ unwords
+ [ "You currently have"
+ , show curr
+ , "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
+ ]
+ putStrLn "Successfully recovered repository!"
+ putStrLn "Please carefully check that the changes mentioned above are ok.."
+ return (True, stillmissing)
+ else do
+ if repoIsLocalBare g
+ then do
+ putStrLn "If you have a clone of this bare repository, you should add it as a remote of this repository, and re-run git-recover-repository."
+ putStrLn "If there are no clones of this repository, you can instead run git-recover-repository with the --force parameter to force recovery to a possibly usable state."
+ else putStrLn "To force a recovery to a usable state, run this command again with the --force parameter."
+ return (False, stillmissing)
+ where
+ successfulfinish stillmissing = do
+ mapM_ putStrLn
+ [ "Successfully recovered repository!"
+ , "You should run \"git fsck\" to make sure, but it looks like"
+ , "everything was recovered ok."
+ ]
+ return (True, stillmissing)