diff options
author | Joey Hess <joey@kitenet.net> | 2013-10-20 17:50:51 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2013-10-20 17:50:51 -0400 |
commit | 66ccc37a99994f1206d308c0c0003ac2605d3c42 (patch) | |
tree | 17fd97ca91a7c963cf80ac1274c94ac841e8572c /Git | |
parent | 66e2c6e3a16480e25f82ef447cfebac16997aac5 (diff) |
git-recover-repository 1/2 done
Diffstat (limited to 'Git')
-rw-r--r-- | Git/Branch.hs | 2 | ||||
-rw-r--r-- | Git/CatFile.hs | 34 | ||||
-rw-r--r-- | Git/Command.hs | 10 | ||||
-rw-r--r-- | Git/Fsck.hs | 66 | ||||
-rw-r--r-- | Git/HashObject.hs | 7 | ||||
-rw-r--r-- | Git/Objects.hs | 29 | ||||
-rw-r--r-- | Git/RecoverRepository.hs | 171 |
7 files changed, 294 insertions, 25 deletions
diff --git a/Git/Branch.hs b/Git/Branch.hs index fed53d767..01d028f55 100644 --- a/Git/Branch.hs +++ b/Git/Branch.hs @@ -96,7 +96,7 @@ commit message branch parentrefs repo = do pipeReadStrict [Param "write-tree"] repo sha <- getSha "commit-tree" $ pipeWriteRead (map Param $ ["commit-tree", show tree] ++ ps) - message repo + (Just $ flip hPutStr message) repo run [Param "update-ref", Param $ show branch, Param $ show sha] repo return sha where diff --git a/Git/CatFile.hs b/Git/CatFile.hs index 2565dff94..aee6bd19f 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -8,6 +8,7 @@ module Git.CatFile ( CatFileHandle, catFileStart, + catFileStart', catFileStop, catFile, catTree, @@ -18,8 +19,7 @@ module Git.CatFile ( import System.IO import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L -import Data.Char -import System.Process (std_out, std_err) +import Data.Tuple.Utils import Numeric import System.Posix.Types @@ -30,13 +30,15 @@ import Git.Command import Git.Types import Git.FilePath import qualified Utility.CoProcess as CoProcess -import Utility.Hash data CatFileHandle = CatFileHandle CoProcess.CoProcessHandle Repo catFileStart :: Repo -> IO CatFileHandle -catFileStart repo = do - coprocess <- CoProcess.rawMode =<< gitCoProcessStart True +catFileStart = catFileStart' True + +catFileStart' :: Bool -> Repo -> IO CatFileHandle +catFileStart' restartable repo = do + coprocess <- CoProcess.rawMode =<< gitCoProcessStart restartable [ Param "cat-file" , Param "--batch" ] repo @@ -53,11 +55,10 @@ catFile h branch file = catObject h $ Ref $ {- Uses a running git cat-file read the content of an object. - Objects that do not exist will have "" returned. -} catObject :: CatFileHandle -> Ref -> IO L.ByteString -catObject h object = maybe L.empty fst <$> catObjectDetails h object +catObject h object = maybe L.empty fst3 <$> catObjectDetails h object -{- Gets both the content of an object, and its Sha. -} -catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha)) -catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send receive +catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType)) +catObjectDetails (CatFileHandle hdl _) object = CoProcess.query hdl send receive where query = show object send to = hPutStrLn to query @@ -65,19 +66,18 @@ catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send rece header <- hGetLine from case words header of [sha, objtype, size] - | length sha == shaSize && - isJust (readObjectType objtype) -> - case reads size of - [(bytes, "")] -> readcontent bytes from sha + | length sha == shaSize -> + case (readObjectType objtype, reads size) of + (Just t, [(bytes, "")]) -> readcontent t bytes from sha _ -> dne | otherwise -> dne _ | header == show object ++ " missing" -> dne | otherwise -> error $ "unknown response from git cat-file " ++ show (header, object) - readcontent bytes from sha = do + readcontent objtype bytes from sha = do content <- S.hGet from bytes eatchar '\n' from - return $ Just (L.fromChunks [content], Ref sha) + return $ Just (L.fromChunks [content], Ref sha, objtype) dne = return Nothing eatchar expected from = do c <- hGetChar from @@ -88,8 +88,8 @@ catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send rece catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)] catTree h treeref = go <$> catObjectDetails h treeref where - go Nothing = [] - go (Just (b, _)) = parsetree [] b + go (Just (b, _, TreeObject)) = parsetree [] b + go _ = [] parsetree c b = case L.break (== 0) b of (modefile, rest) diff --git a/Git/Command.hs b/Git/Command.hs index 9d05e0d17..8b027d2c3 100644 --- a/Git/Command.hs +++ b/Git/Command.hs @@ -1,6 +1,6 @@ {- running git commands - - - Copyright 2010-2012 Joey Hess <joey@kitenet.net> + - Copyright 2010-2013 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -85,13 +85,13 @@ pipeReadStrict params repo = assertLocal repo $ where p = gitCreateProcess params repo -{- Runs a git command, feeding it input, and returning its output, +{- Runs a git command, feeding it an input, and returning its output, - which is expected to be fairly small, since it's all read into memory - strictly. -} -pipeWriteRead :: [CommandParam] -> String -> Repo -> IO String -pipeWriteRead params s repo = assertLocal repo $ +pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO String +pipeWriteRead params writer repo = assertLocal repo $ writeReadProcessEnv "git" (toCommand $ gitCommandLine params repo) - (gitEnv repo) s (Just adjusthandle) + (gitEnv repo) writer (Just adjusthandle) where adjusthandle h = do fileEncoding h diff --git a/Git/Fsck.hs b/Git/Fsck.hs new file mode 100644 index 000000000..a43a84f3e --- /dev/null +++ b/Git/Fsck.hs @@ -0,0 +1,66 @@ +{- git fsck interface + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Fsck ( + findBroken, + findMissing +) where + +import Common +import Git +import Git.Command +import Git.Sha +import Git.CatFile + +import qualified Data.Set as S + +{- Runs fsck to find some of the broken objects in the repository. + - May not find all broken objects, if fsck fails on bad data in some of + - the broken objects it does find. If the fsck fails generally without + - finding any broken objects, returns Nothing. + - + - Strategy: Rather than parsing fsck's current specific output, + - look for anything in its output (both stdout and stderr) that appears + - to be a git sha. Not all such shas are of broken objects, so ask git + - to try to cat the object, and see if it fails. + -} +findBroken :: Repo -> IO (Maybe (S.Set Sha)) +findBroken r = do + (output, fsckok) <- processTranscript "git" (toCommand $ fsckParams r) Nothing + let objs = parseFsckOutput output + badobjs <- findMissing objs r + if S.null badobjs && not fsckok + then return Nothing + else return $ Just badobjs + +{- Finds objects that are missing from the git repsitory, or are corrupt. + - Note that catting a corrupt object will cause cat-file to crash. -} +findMissing :: [Sha] -> Repo -> IO (S.Set Sha) +findMissing objs r = go objs [] =<< start + where + start = catFileStart' False r + go [] c h = do + catFileStop h + return $ S.fromList c + go (o:os) c h = do + v <- tryIO $ isNothing <$> catObjectDetails h o + case v of + Left _ -> do + void $ tryIO $ catFileStop h + go os (o:c) =<< start + Right True -> go os (o:c) h + Right False -> go os c h + +parseFsckOutput :: String -> [Sha] +parseFsckOutput = catMaybes . map extractSha . concat . map words . lines + +fsckParams :: Repo -> [CommandParam] +fsckParams = gitCommandLine + [ Param "fsck" + , Param "--no-dangling" + , Param "--no-reflogs" + ] diff --git a/Git/HashObject.hs b/Git/HashObject.hs index c6e1d2349..bb9b20d96 100644 --- a/Git/HashObject.hs +++ b/Git/HashObject.hs @@ -36,8 +36,11 @@ hashFile h file = CoProcess.query h send receive {- Injects some content into git, returning its Sha. -} hashObject :: ObjectType -> String -> Repo -> IO Sha -hashObject objtype content repo = getSha subcmd $ - pipeWriteRead (map Param params) content repo +hashObject objtype content = hashObject' objtype (flip hPutStr content) + +hashObject' :: ObjectType -> (Handle -> IO ()) -> Repo -> IO Sha +hashObject' objtype writer repo = getSha subcmd $ + pipeWriteRead (map Param params) (Just writer) repo where subcmd = "hash-object" params = [subcmd, "-t", show objtype, "-w", "--stdin", "--no-filters"] diff --git a/Git/Objects.hs b/Git/Objects.hs new file mode 100644 index 000000000..b1c580533 --- /dev/null +++ b/Git/Objects.hs @@ -0,0 +1,29 @@ +{- .git/objects + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.Objects where + +import Common +import Git + +objectsDir :: Repo -> FilePath +objectsDir r = localGitDir r </> "objects" + +packDir :: Repo -> FilePath +packDir r = objectsDir r </> "pack" + +listPackFiles :: Repo -> IO [FilePath] +listPackFiles r = filter (".pack" `isSuffixOf`) + <$> catchDefaultIO [] (dirContents $ packDir r) + +packIdxFile :: FilePath -> FilePath +packIdxFile = flip replaceExtension "idx" + +looseObjectFile :: Repo -> Sha -> FilePath +looseObjectFile r sha = objectsDir r </> prefix </> rest + where + (prefix, rest) = splitAt 2 (show sha) diff --git a/Git/RecoverRepository.hs b/Git/RecoverRepository.hs new file mode 100644 index 000000000..53fbf0ce7 --- /dev/null +++ b/Git/RecoverRepository.hs @@ -0,0 +1,171 @@ +{- git repository recovery + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Git.RecoverRepository ( + cleanCorruptObjects, + retrieveMissingObjects, + resetLocalBranches, + removeTrackingBranches, +) where + +import Common +import Git +import Git.Command +import Git.Fsck +import Git.Objects +import Git.HashObject +import Git.Types +import qualified Git.Config +import qualified Git.Construct +import Utility.Tmp +import Utility.Monad +import Utility.Rsync + +import qualified Data.Set as S +import qualified Data.ByteString.Lazy as L +import System.Log.Logger + +{- Finds and removes corrupt objects from the repository, returning a list + - of all such objects, which need to be found elsewhere to finish + - recovery. + - + - Strategy: Run git fsck, remove objects it identifies as corrupt, + - and repeat until git fsck finds no new objects. + - + - To remove corrupt objects, unpack all packs, and remove the packs + - (to handle corrupt packs), and remove loose object files. + -} +cleanCorruptObjects :: Repo -> IO (S.Set Sha) +cleanCorruptObjects r = do + notice "Running git fsck ..." + check =<< findBroken r + where + check Nothing = do + notice "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file? Unpacking all pack files." + explodePacks r + retry S.empty + check (Just bad) + | S.null bad = return S.empty + | otherwise = do + notice $ unwords + [ "git fsck found" + , show (S.size bad) + , "broken objects. Unpacking all pack files." + ] + explodePacks r + removeLoose r bad + retry bad + retry oldbad = do + notice "Re-running git fsck to see if it finds more problems." + v <- findBroken r + case v of + Nothing -> error $ unwords + [ "git fsck found a problem, which was not corrected after removing" + , show (S.size oldbad) + , "corrupt objects." + ] + Just newbad -> do + removeLoose r newbad + let s = S.union oldbad newbad + if s == oldbad + then return s + else retry s + +removeLoose :: Repo -> S.Set Sha -> IO () +removeLoose r s = do + let fs = map (looseObjectFile r) (S.toList s) + count <- length <$> filterM doesFileExist fs + when (count > 0) $ do + notice $ unwords + [ "removing" + , show count + , "corrupt loose objects" + ] + mapM_ nukeFile fs + +explodePacks :: Repo -> IO () +explodePacks r = mapM_ go =<< listPackFiles r + where + go packfile = do + -- May fail, if pack file is corrupt. + void $ tryIO $ + pipeWrite [Param "unpack-objects"] r $ \h -> + L.hPut h =<< L.readFile packfile + nukeFile packfile + nukeFile $ packIdxFile packfile + +{- Try to retrieve a set of missing objects, from the remotes of a + - repository. Returns any that could not be retreived. + -} +retrieveMissingObjects :: S.Set Sha -> Repo -> IO (S.Set Sha) +retrieveMissingObjects missing r + | S.null missing = return missing + | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do + unlessM (boolSystem "git" [Params "init", File tmpdir]) $ + error $ "failed to create temp repository in " ++ tmpdir + tmpr <- Git.Config.read =<< Git.Construct.fromAbsPath tmpdir + stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing + if S.null stillmissing + then return stillmissing + else pullremotes tmpr (remotes r) fetchallrefs stillmissing + where + pullremotes tmpr [] _ stillmissing = return stillmissing + pullremotes tmpr (rmt:rmts) fetchrefs s + | S.null s = return s + | otherwise = do + notice $ "Trying to recover missing objects from remote " ++ repoDescribe rmt + ifM (fetchsome rmt fetchrefs tmpr) + ( do + void $ copyObjects tmpr r + stillmissing <- findMissing (S.toList s) r + pullremotes tmpr rmts fetchrefs stillmissing + , do + notice $ unwords + [ "failed to fetch from remote" + , repoDescribe rmt + , "(will continue without it, but making this remote available may improve recovery)" + ] + pullremotes tmpr rmts fetchrefs s + ) + fetchsome rmt ps = runBool $ + [ Param "fetch" + , Param (repoLocation rmt) + , Params "--force --update-head-ok --quiet" + ] ++ ps + -- fetch refs and tags + fetchrefstags = [ Param "+refs/heads/*:refs/heads/*", Param "--tags"] + -- Fetch all available refs (more likely to fail, + -- as the remote may have refs it refuses to send). + fetchallrefs = [ Param "+*:*" ] + +{- Copies all objects from the src repository to the dest repository. + - This is done using rsync, so it copies all missing object, and all + - objects they rely on. -} +copyObjects :: Repo -> Repo -> IO Bool +copyObjects srcr destr = rsync + [ Param "-qr" + , File $ addTrailingPathSeparator $ objectsDir srcr + , File $ addTrailingPathSeparator $ objectsDir destr + ] + +{- To deal with missing objects that cannot be recovered, resets any + - local branches to point to an old commit before the missing + - objects. + -} +resetLocalBranches :: S.Set Sha -> Repo -> IO [Branch] +resetLocalBranches missing r = do + error "TODO" + +{- To deal with missing objects that cannot be recovered, removes + - any remote tracking branches that reference them. + -} +removeTrackingBranches :: S.Set Sha -> Repo -> IO [Branch] +removeTrackingBranches missing r = do + error "TODO" + +notice :: String -> IO () +notice = noticeM "RecoverRepository" |