summaryrefslogtreecommitdiff
path: root/Git
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-10-20 17:50:51 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-10-20 17:50:51 -0400
commit66ccc37a99994f1206d308c0c0003ac2605d3c42 (patch)
tree17fd97ca91a7c963cf80ac1274c94ac841e8572c /Git
parent66e2c6e3a16480e25f82ef447cfebac16997aac5 (diff)
git-recover-repository 1/2 done
Diffstat (limited to 'Git')
-rw-r--r--Git/Branch.hs2
-rw-r--r--Git/CatFile.hs34
-rw-r--r--Git/Command.hs10
-rw-r--r--Git/Fsck.hs66
-rw-r--r--Git/HashObject.hs7
-rw-r--r--Git/Objects.hs29
-rw-r--r--Git/RecoverRepository.hs171
7 files changed, 294 insertions, 25 deletions
diff --git a/Git/Branch.hs b/Git/Branch.hs
index fed53d767..01d028f55 100644
--- a/Git/Branch.hs
+++ b/Git/Branch.hs
@@ -96,7 +96,7 @@ commit message branch parentrefs repo = do
pipeReadStrict [Param "write-tree"] repo
sha <- getSha "commit-tree" $ pipeWriteRead
(map Param $ ["commit-tree", show tree] ++ ps)
- message repo
+ (Just $ flip hPutStr message) repo
run [Param "update-ref", Param $ show branch, Param $ show sha] repo
return sha
where
diff --git a/Git/CatFile.hs b/Git/CatFile.hs
index 2565dff94..aee6bd19f 100644
--- a/Git/CatFile.hs
+++ b/Git/CatFile.hs
@@ -8,6 +8,7 @@
module Git.CatFile (
CatFileHandle,
catFileStart,
+ catFileStart',
catFileStop,
catFile,
catTree,
@@ -18,8 +19,7 @@ module Git.CatFile (
import System.IO
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
-import Data.Char
-import System.Process (std_out, std_err)
+import Data.Tuple.Utils
import Numeric
import System.Posix.Types
@@ -30,13 +30,15 @@ import Git.Command
import Git.Types
import Git.FilePath
import qualified Utility.CoProcess as CoProcess
-import Utility.Hash
data CatFileHandle = CatFileHandle CoProcess.CoProcessHandle Repo
catFileStart :: Repo -> IO CatFileHandle
-catFileStart repo = do
- coprocess <- CoProcess.rawMode =<< gitCoProcessStart True
+catFileStart = catFileStart' True
+
+catFileStart' :: Bool -> Repo -> IO CatFileHandle
+catFileStart' restartable repo = do
+ coprocess <- CoProcess.rawMode =<< gitCoProcessStart restartable
[ Param "cat-file"
, Param "--batch"
] repo
@@ -53,11 +55,10 @@ catFile h branch file = catObject h $ Ref $
{- Uses a running git cat-file read the content of an object.
- Objects that do not exist will have "" returned. -}
catObject :: CatFileHandle -> Ref -> IO L.ByteString
-catObject h object = maybe L.empty fst <$> catObjectDetails h object
+catObject h object = maybe L.empty fst3 <$> catObjectDetails h object
-{- Gets both the content of an object, and its Sha. -}
-catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha))
-catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send receive
+catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType))
+catObjectDetails (CatFileHandle hdl _) object = CoProcess.query hdl send receive
where
query = show object
send to = hPutStrLn to query
@@ -65,19 +66,18 @@ catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send rece
header <- hGetLine from
case words header of
[sha, objtype, size]
- | length sha == shaSize &&
- isJust (readObjectType objtype) ->
- case reads size of
- [(bytes, "")] -> readcontent bytes from sha
+ | length sha == shaSize ->
+ case (readObjectType objtype, reads size) of
+ (Just t, [(bytes, "")]) -> readcontent t bytes from sha
_ -> dne
| otherwise -> dne
_
| header == show object ++ " missing" -> dne
| otherwise -> error $ "unknown response from git cat-file " ++ show (header, object)
- readcontent bytes from sha = do
+ readcontent objtype bytes from sha = do
content <- S.hGet from bytes
eatchar '\n' from
- return $ Just (L.fromChunks [content], Ref sha)
+ return $ Just (L.fromChunks [content], Ref sha, objtype)
dne = return Nothing
eatchar expected from = do
c <- hGetChar from
@@ -88,8 +88,8 @@ catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send rece
catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)]
catTree h treeref = go <$> catObjectDetails h treeref
where
- go Nothing = []
- go (Just (b, _)) = parsetree [] b
+ go (Just (b, _, TreeObject)) = parsetree [] b
+ go _ = []
parsetree c b = case L.break (== 0) b of
(modefile, rest)
diff --git a/Git/Command.hs b/Git/Command.hs
index 9d05e0d17..8b027d2c3 100644
--- a/Git/Command.hs
+++ b/Git/Command.hs
@@ -1,6 +1,6 @@
{- running git commands
-
- - Copyright 2010-2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2010-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -85,13 +85,13 @@ pipeReadStrict params repo = assertLocal repo $
where
p = gitCreateProcess params repo
-{- Runs a git command, feeding it input, and returning its output,
+{- Runs a git command, feeding it an input, and returning its output,
- which is expected to be fairly small, since it's all read into memory
- strictly. -}
-pipeWriteRead :: [CommandParam] -> String -> Repo -> IO String
-pipeWriteRead params s repo = assertLocal repo $
+pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO String
+pipeWriteRead params writer repo = assertLocal repo $
writeReadProcessEnv "git" (toCommand $ gitCommandLine params repo)
- (gitEnv repo) s (Just adjusthandle)
+ (gitEnv repo) writer (Just adjusthandle)
where
adjusthandle h = do
fileEncoding h
diff --git a/Git/Fsck.hs b/Git/Fsck.hs
new file mode 100644
index 000000000..a43a84f3e
--- /dev/null
+++ b/Git/Fsck.hs
@@ -0,0 +1,66 @@
+{- git fsck interface
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.Fsck (
+ findBroken,
+ findMissing
+) where
+
+import Common
+import Git
+import Git.Command
+import Git.Sha
+import Git.CatFile
+
+import qualified Data.Set as S
+
+{- Runs fsck to find some of the broken objects in the repository.
+ - May not find all broken objects, if fsck fails on bad data in some of
+ - the broken objects it does find. If the fsck fails generally without
+ - finding any broken objects, returns Nothing.
+ -
+ - Strategy: Rather than parsing fsck's current specific output,
+ - look for anything in its output (both stdout and stderr) that appears
+ - to be a git sha. Not all such shas are of broken objects, so ask git
+ - to try to cat the object, and see if it fails.
+ -}
+findBroken :: Repo -> IO (Maybe (S.Set Sha))
+findBroken r = do
+ (output, fsckok) <- processTranscript "git" (toCommand $ fsckParams r) Nothing
+ let objs = parseFsckOutput output
+ badobjs <- findMissing objs r
+ if S.null badobjs && not fsckok
+ then return Nothing
+ else return $ Just badobjs
+
+{- Finds objects that are missing from the git repsitory, or are corrupt.
+ - Note that catting a corrupt object will cause cat-file to crash. -}
+findMissing :: [Sha] -> Repo -> IO (S.Set Sha)
+findMissing objs r = go objs [] =<< start
+ where
+ start = catFileStart' False r
+ go [] c h = do
+ catFileStop h
+ return $ S.fromList c
+ go (o:os) c h = do
+ v <- tryIO $ isNothing <$> catObjectDetails h o
+ case v of
+ Left _ -> do
+ void $ tryIO $ catFileStop h
+ go os (o:c) =<< start
+ Right True -> go os (o:c) h
+ Right False -> go os c h
+
+parseFsckOutput :: String -> [Sha]
+parseFsckOutput = catMaybes . map extractSha . concat . map words . lines
+
+fsckParams :: Repo -> [CommandParam]
+fsckParams = gitCommandLine
+ [ Param "fsck"
+ , Param "--no-dangling"
+ , Param "--no-reflogs"
+ ]
diff --git a/Git/HashObject.hs b/Git/HashObject.hs
index c6e1d2349..bb9b20d96 100644
--- a/Git/HashObject.hs
+++ b/Git/HashObject.hs
@@ -36,8 +36,11 @@ hashFile h file = CoProcess.query h send receive
{- Injects some content into git, returning its Sha. -}
hashObject :: ObjectType -> String -> Repo -> IO Sha
-hashObject objtype content repo = getSha subcmd $
- pipeWriteRead (map Param params) content repo
+hashObject objtype content = hashObject' objtype (flip hPutStr content)
+
+hashObject' :: ObjectType -> (Handle -> IO ()) -> Repo -> IO Sha
+hashObject' objtype writer repo = getSha subcmd $
+ pipeWriteRead (map Param params) (Just writer) repo
where
subcmd = "hash-object"
params = [subcmd, "-t", show objtype, "-w", "--stdin", "--no-filters"]
diff --git a/Git/Objects.hs b/Git/Objects.hs
new file mode 100644
index 000000000..b1c580533
--- /dev/null
+++ b/Git/Objects.hs
@@ -0,0 +1,29 @@
+{- .git/objects
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.Objects where
+
+import Common
+import Git
+
+objectsDir :: Repo -> FilePath
+objectsDir r = localGitDir r </> "objects"
+
+packDir :: Repo -> FilePath
+packDir r = objectsDir r </> "pack"
+
+listPackFiles :: Repo -> IO [FilePath]
+listPackFiles r = filter (".pack" `isSuffixOf`)
+ <$> catchDefaultIO [] (dirContents $ packDir r)
+
+packIdxFile :: FilePath -> FilePath
+packIdxFile = flip replaceExtension "idx"
+
+looseObjectFile :: Repo -> Sha -> FilePath
+looseObjectFile r sha = objectsDir r </> prefix </> rest
+ where
+ (prefix, rest) = splitAt 2 (show sha)
diff --git a/Git/RecoverRepository.hs b/Git/RecoverRepository.hs
new file mode 100644
index 000000000..53fbf0ce7
--- /dev/null
+++ b/Git/RecoverRepository.hs
@@ -0,0 +1,171 @@
+{- git repository recovery
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.RecoverRepository (
+ cleanCorruptObjects,
+ retrieveMissingObjects,
+ resetLocalBranches,
+ removeTrackingBranches,
+) where
+
+import Common
+import Git
+import Git.Command
+import Git.Fsck
+import Git.Objects
+import Git.HashObject
+import Git.Types
+import qualified Git.Config
+import qualified Git.Construct
+import Utility.Tmp
+import Utility.Monad
+import Utility.Rsync
+
+import qualified Data.Set as S
+import qualified Data.ByteString.Lazy as L
+import System.Log.Logger
+
+{- Finds and removes corrupt objects from the repository, returning a list
+ - of all such objects, which need to be found elsewhere to finish
+ - recovery.
+ -
+ - Strategy: Run git fsck, remove objects it identifies as corrupt,
+ - and repeat until git fsck finds no new objects.
+ -
+ - To remove corrupt objects, unpack all packs, and remove the packs
+ - (to handle corrupt packs), and remove loose object files.
+ -}
+cleanCorruptObjects :: Repo -> IO (S.Set Sha)
+cleanCorruptObjects r = do
+ notice "Running git fsck ..."
+ check =<< findBroken r
+ where
+ check Nothing = do
+ notice "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file? Unpacking all pack files."
+ explodePacks r
+ retry S.empty
+ check (Just bad)
+ | S.null bad = return S.empty
+ | otherwise = do
+ notice $ unwords
+ [ "git fsck found"
+ , show (S.size bad)
+ , "broken objects. Unpacking all pack files."
+ ]
+ explodePacks r
+ removeLoose r bad
+ retry bad
+ retry oldbad = do
+ notice "Re-running git fsck to see if it finds more problems."
+ v <- findBroken r
+ case v of
+ Nothing -> error $ unwords
+ [ "git fsck found a problem, which was not corrected after removing"
+ , show (S.size oldbad)
+ , "corrupt objects."
+ ]
+ Just newbad -> do
+ removeLoose r newbad
+ let s = S.union oldbad newbad
+ if s == oldbad
+ then return s
+ else retry s
+
+removeLoose :: Repo -> S.Set Sha -> IO ()
+removeLoose r s = do
+ let fs = map (looseObjectFile r) (S.toList s)
+ count <- length <$> filterM doesFileExist fs
+ when (count > 0) $ do
+ notice $ unwords
+ [ "removing"
+ , show count
+ , "corrupt loose objects"
+ ]
+ mapM_ nukeFile fs
+
+explodePacks :: Repo -> IO ()
+explodePacks r = mapM_ go =<< listPackFiles r
+ where
+ go packfile = do
+ -- May fail, if pack file is corrupt.
+ void $ tryIO $
+ pipeWrite [Param "unpack-objects"] r $ \h ->
+ L.hPut h =<< L.readFile packfile
+ nukeFile packfile
+ nukeFile $ packIdxFile packfile
+
+{- Try to retrieve a set of missing objects, from the remotes of a
+ - repository. Returns any that could not be retreived.
+ -}
+retrieveMissingObjects :: S.Set Sha -> Repo -> IO (S.Set Sha)
+retrieveMissingObjects missing r
+ | S.null missing = return missing
+ | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
+ unlessM (boolSystem "git" [Params "init", File tmpdir]) $
+ error $ "failed to create temp repository in " ++ tmpdir
+ tmpr <- Git.Config.read =<< Git.Construct.fromAbsPath tmpdir
+ stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
+ if S.null stillmissing
+ then return stillmissing
+ else pullremotes tmpr (remotes r) fetchallrefs stillmissing
+ where
+ pullremotes tmpr [] _ stillmissing = return stillmissing
+ pullremotes tmpr (rmt:rmts) fetchrefs s
+ | S.null s = return s
+ | otherwise = do
+ notice $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
+ ifM (fetchsome rmt fetchrefs tmpr)
+ ( do
+ void $ copyObjects tmpr r
+ stillmissing <- findMissing (S.toList s) r
+ pullremotes tmpr rmts fetchrefs stillmissing
+ , do
+ notice $ unwords
+ [ "failed to fetch from remote"
+ , repoDescribe rmt
+ , "(will continue without it, but making this remote available may improve recovery)"
+ ]
+ pullremotes tmpr rmts fetchrefs s
+ )
+ fetchsome rmt ps = runBool $
+ [ Param "fetch"
+ , Param (repoLocation rmt)
+ , Params "--force --update-head-ok --quiet"
+ ] ++ ps
+ -- fetch refs and tags
+ fetchrefstags = [ Param "+refs/heads/*:refs/heads/*", Param "--tags"]
+ -- Fetch all available refs (more likely to fail,
+ -- as the remote may have refs it refuses to send).
+ fetchallrefs = [ Param "+*:*" ]
+
+{- Copies all objects from the src repository to the dest repository.
+ - This is done using rsync, so it copies all missing object, and all
+ - objects they rely on. -}
+copyObjects :: Repo -> Repo -> IO Bool
+copyObjects srcr destr = rsync
+ [ Param "-qr"
+ , File $ addTrailingPathSeparator $ objectsDir srcr
+ , File $ addTrailingPathSeparator $ objectsDir destr
+ ]
+
+{- To deal with missing objects that cannot be recovered, resets any
+ - local branches to point to an old commit before the missing
+ - objects.
+ -}
+resetLocalBranches :: S.Set Sha -> Repo -> IO [Branch]
+resetLocalBranches missing r = do
+ error "TODO"
+
+{- To deal with missing objects that cannot be recovered, removes
+ - any remote tracking branches that reference them.
+ -}
+removeTrackingBranches :: S.Set Sha -> Repo -> IO [Branch]
+removeTrackingBranches missing r = do
+ error "TODO"
+
+notice :: String -> IO ()
+notice = noticeM "RecoverRepository"