summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-10-20 17:50:51 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-10-20 17:50:51 -0400
commit66ccc37a99994f1206d308c0c0003ac2605d3c42 (patch)
tree17fd97ca91a7c963cf80ac1274c94ac841e8572c
parent66e2c6e3a16480e25f82ef447cfebac16997aac5 (diff)
git-recover-repository 1/2 done
-rw-r--r--.gitignore11
-rw-r--r--Annex/CatFile.hs2
-rw-r--r--Assistant/Threads/Watcher.hs2
-rw-r--r--Git/Branch.hs2
-rw-r--r--Git/CatFile.hs34
-rw-r--r--Git/Command.hs10
-rw-r--r--Git/Fsck.hs66
-rw-r--r--Git/HashObject.hs7
-rw-r--r--Git/Objects.hs29
-rw-r--r--Git/RecoverRepository.hs171
-rw-r--r--Makefile12
-rw-r--r--Utility/Process.hs8
-rw-r--r--debian/changelog2
-rw-r--r--doc/design/assistant/disaster_recovery.mdwn17
-rw-r--r--doc/git-recover-repository.mdwn28
-rw-r--r--git-recover-repository.hs73
16 files changed, 431 insertions, 43 deletions
diff --git a/.gitignore b/.gitignore
index 717b58abc..221087847 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
+tags
+Setup
+*.hi
+*.o
tmp
test
build-stamp
@@ -9,7 +13,10 @@ Build/OSXMkLibs
git-annex
git-annex.1
git-annex-shell.1
+git-union-merge
git-union-merge.1
+git-recover-repository
+git-recover-repository.1
doc/.ikiwiki
html
*.tix
@@ -22,7 +29,3 @@ cabal-dev
# OSX related
.DS_Store
.virthualenv
-tags
-Setup
-*.hi
-*.o
diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs
index fa73d8282..407b4ddae 100644
--- a/Annex/CatFile.hs
+++ b/Annex/CatFile.hs
@@ -43,7 +43,7 @@ catTree ref = do
h <- catFileHandle
liftIO $ Git.CatFile.catTree h ref
-catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha))
+catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha, ObjectType))
catObjectDetails ref = do
h <- catFileHandle
liftIO $ Git.CatFile.catObjectDetails h ref
diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs
index c10d03eeb..a44664639 100644
--- a/Assistant/Threads/Watcher.hs
+++ b/Assistant/Threads/Watcher.hs
@@ -300,7 +300,7 @@ addLink file link mk = do
liftAnnex $ do
v <- catObjectDetails $ Ref $ ':':file
case v of
- Just (currlink, sha)
+ Just (currlink, sha, _type)
| s2w8 link == L.unpack currlink ->
stageSymlink file sha
_ -> stageSymlink file =<< hashSymlink link
diff --git a/Git/Branch.hs b/Git/Branch.hs
index fed53d767..01d028f55 100644
--- a/Git/Branch.hs
+++ b/Git/Branch.hs
@@ -96,7 +96,7 @@ commit message branch parentrefs repo = do
pipeReadStrict [Param "write-tree"] repo
sha <- getSha "commit-tree" $ pipeWriteRead
(map Param $ ["commit-tree", show tree] ++ ps)
- message repo
+ (Just $ flip hPutStr message) repo
run [Param "update-ref", Param $ show branch, Param $ show sha] repo
return sha
where
diff --git a/Git/CatFile.hs b/Git/CatFile.hs
index 2565dff94..aee6bd19f 100644
--- a/Git/CatFile.hs
+++ b/Git/CatFile.hs
@@ -8,6 +8,7 @@
module Git.CatFile (
CatFileHandle,
catFileStart,
+ catFileStart',
catFileStop,
catFile,
catTree,
@@ -18,8 +19,7 @@ module Git.CatFile (
import System.IO
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
-import Data.Char
-import System.Process (std_out, std_err)
+import Data.Tuple.Utils
import Numeric
import System.Posix.Types
@@ -30,13 +30,15 @@ import Git.Command
import Git.Types
import Git.FilePath
import qualified Utility.CoProcess as CoProcess
-import Utility.Hash
data CatFileHandle = CatFileHandle CoProcess.CoProcessHandle Repo
catFileStart :: Repo -> IO CatFileHandle
-catFileStart repo = do
- coprocess <- CoProcess.rawMode =<< gitCoProcessStart True
+catFileStart = catFileStart' True
+
+catFileStart' :: Bool -> Repo -> IO CatFileHandle
+catFileStart' restartable repo = do
+ coprocess <- CoProcess.rawMode =<< gitCoProcessStart restartable
[ Param "cat-file"
, Param "--batch"
] repo
@@ -53,11 +55,10 @@ catFile h branch file = catObject h $ Ref $
{- Uses a running git cat-file read the content of an object.
- Objects that do not exist will have "" returned. -}
catObject :: CatFileHandle -> Ref -> IO L.ByteString
-catObject h object = maybe L.empty fst <$> catObjectDetails h object
+catObject h object = maybe L.empty fst3 <$> catObjectDetails h object
-{- Gets both the content of an object, and its Sha. -}
-catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha))
-catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send receive
+catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType))
+catObjectDetails (CatFileHandle hdl _) object = CoProcess.query hdl send receive
where
query = show object
send to = hPutStrLn to query
@@ -65,19 +66,18 @@ catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send rece
header <- hGetLine from
case words header of
[sha, objtype, size]
- | length sha == shaSize &&
- isJust (readObjectType objtype) ->
- case reads size of
- [(bytes, "")] -> readcontent bytes from sha
+ | length sha == shaSize ->
+ case (readObjectType objtype, reads size) of
+ (Just t, [(bytes, "")]) -> readcontent t bytes from sha
_ -> dne
| otherwise -> dne
_
| header == show object ++ " missing" -> dne
| otherwise -> error $ "unknown response from git cat-file " ++ show (header, object)
- readcontent bytes from sha = do
+ readcontent objtype bytes from sha = do
content <- S.hGet from bytes
eatchar '\n' from
- return $ Just (L.fromChunks [content], Ref sha)
+ return $ Just (L.fromChunks [content], Ref sha, objtype)
dne = return Nothing
eatchar expected from = do
c <- hGetChar from
@@ -88,8 +88,8 @@ catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send rece
catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)]
catTree h treeref = go <$> catObjectDetails h treeref
where
- go Nothing = []
- go (Just (b, _)) = parsetree [] b
+ go (Just (b, _, TreeObject)) = parsetree [] b
+ go _ = []
parsetree c b = case L.break (== 0) b of
(modefile, rest)
diff --git a/Git/Command.hs b/Git/Command.hs
index 9d05e0d17..8b027d2c3 100644
--- a/Git/Command.hs
+++ b/Git/Command.hs
@@ -1,6 +1,6 @@
{- running git commands
-
- - Copyright 2010-2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2010-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -85,13 +85,13 @@ pipeReadStrict params repo = assertLocal repo $
where
p = gitCreateProcess params repo
-{- Runs a git command, feeding it input, and returning its output,
+{- Runs a git command, feeding it an input, and returning its output,
- which is expected to be fairly small, since it's all read into memory
- strictly. -}
-pipeWriteRead :: [CommandParam] -> String -> Repo -> IO String
-pipeWriteRead params s repo = assertLocal repo $
+pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO String
+pipeWriteRead params writer repo = assertLocal repo $
writeReadProcessEnv "git" (toCommand $ gitCommandLine params repo)
- (gitEnv repo) s (Just adjusthandle)
+ (gitEnv repo) writer (Just adjusthandle)
where
adjusthandle h = do
fileEncoding h
diff --git a/Git/Fsck.hs b/Git/Fsck.hs
new file mode 100644
index 000000000..a43a84f3e
--- /dev/null
+++ b/Git/Fsck.hs
@@ -0,0 +1,66 @@
+{- git fsck interface
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.Fsck (
+ findBroken,
+ findMissing
+) where
+
+import Common
+import Git
+import Git.Command
+import Git.Sha
+import Git.CatFile
+
+import qualified Data.Set as S
+
+{- Runs fsck to find some of the broken objects in the repository.
+ - May not find all broken objects, if fsck fails on bad data in some of
+ - the broken objects it does find. If the fsck fails generally without
+ - finding any broken objects, returns Nothing.
+ -
+ - Strategy: Rather than parsing fsck's current specific output,
+ - look for anything in its output (both stdout and stderr) that appears
+ - to be a git sha. Not all such shas are of broken objects, so ask git
+ - to try to cat the object, and see if it fails.
+ -}
+findBroken :: Repo -> IO (Maybe (S.Set Sha))
+findBroken r = do
+ (output, fsckok) <- processTranscript "git" (toCommand $ fsckParams r) Nothing
+ let objs = parseFsckOutput output
+ badobjs <- findMissing objs r
+ if S.null badobjs && not fsckok
+ then return Nothing
+ else return $ Just badobjs
+
+{- Finds objects that are missing from the git repsitory, or are corrupt.
+ - Note that catting a corrupt object will cause cat-file to crash. -}
+findMissing :: [Sha] -> Repo -> IO (S.Set Sha)
+findMissing objs r = go objs [] =<< start
+ where
+ start = catFileStart' False r
+ go [] c h = do
+ catFileStop h
+ return $ S.fromList c
+ go (o:os) c h = do
+ v <- tryIO $ isNothing <$> catObjectDetails h o
+ case v of
+ Left _ -> do
+ void $ tryIO $ catFileStop h
+ go os (o:c) =<< start
+ Right True -> go os (o:c) h
+ Right False -> go os c h
+
+parseFsckOutput :: String -> [Sha]
+parseFsckOutput = catMaybes . map extractSha . concat . map words . lines
+
+fsckParams :: Repo -> [CommandParam]
+fsckParams = gitCommandLine
+ [ Param "fsck"
+ , Param "--no-dangling"
+ , Param "--no-reflogs"
+ ]
diff --git a/Git/HashObject.hs b/Git/HashObject.hs
index c6e1d2349..bb9b20d96 100644
--- a/Git/HashObject.hs
+++ b/Git/HashObject.hs
@@ -36,8 +36,11 @@ hashFile h file = CoProcess.query h send receive
{- Injects some content into git, returning its Sha. -}
hashObject :: ObjectType -> String -> Repo -> IO Sha
-hashObject objtype content repo = getSha subcmd $
- pipeWriteRead (map Param params) content repo
+hashObject objtype content = hashObject' objtype (flip hPutStr content)
+
+hashObject' :: ObjectType -> (Handle -> IO ()) -> Repo -> IO Sha
+hashObject' objtype writer repo = getSha subcmd $
+ pipeWriteRead (map Param params) (Just writer) repo
where
subcmd = "hash-object"
params = [subcmd, "-t", show objtype, "-w", "--stdin", "--no-filters"]
diff --git a/Git/Objects.hs b/Git/Objects.hs
new file mode 100644
index 000000000..b1c580533
--- /dev/null
+++ b/Git/Objects.hs
@@ -0,0 +1,29 @@
+{- .git/objects
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.Objects where
+
+import Common
+import Git
+
+objectsDir :: Repo -> FilePath
+objectsDir r = localGitDir r </> "objects"
+
+packDir :: Repo -> FilePath
+packDir r = objectsDir r </> "pack"
+
+listPackFiles :: Repo -> IO [FilePath]
+listPackFiles r = filter (".pack" `isSuffixOf`)
+ <$> catchDefaultIO [] (dirContents $ packDir r)
+
+packIdxFile :: FilePath -> FilePath
+packIdxFile = flip replaceExtension "idx"
+
+looseObjectFile :: Repo -> Sha -> FilePath
+looseObjectFile r sha = objectsDir r </> prefix </> rest
+ where
+ (prefix, rest) = splitAt 2 (show sha)
diff --git a/Git/RecoverRepository.hs b/Git/RecoverRepository.hs
new file mode 100644
index 000000000..53fbf0ce7
--- /dev/null
+++ b/Git/RecoverRepository.hs
@@ -0,0 +1,171 @@
+{- git repository recovery
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.RecoverRepository (
+ cleanCorruptObjects,
+ retrieveMissingObjects,
+ resetLocalBranches,
+ removeTrackingBranches,
+) where
+
+import Common
+import Git
+import Git.Command
+import Git.Fsck
+import Git.Objects
+import Git.HashObject
+import Git.Types
+import qualified Git.Config
+import qualified Git.Construct
+import Utility.Tmp
+import Utility.Monad
+import Utility.Rsync
+
+import qualified Data.Set as S
+import qualified Data.ByteString.Lazy as L
+import System.Log.Logger
+
+{- Finds and removes corrupt objects from the repository, returning a list
+ - of all such objects, which need to be found elsewhere to finish
+ - recovery.
+ -
+ - Strategy: Run git fsck, remove objects it identifies as corrupt,
+ - and repeat until git fsck finds no new objects.
+ -
+ - To remove corrupt objects, unpack all packs, and remove the packs
+ - (to handle corrupt packs), and remove loose object files.
+ -}
+cleanCorruptObjects :: Repo -> IO (S.Set Sha)
+cleanCorruptObjects r = do
+ notice "Running git fsck ..."
+ check =<< findBroken r
+ where
+ check Nothing = do
+ notice "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file? Unpacking all pack files."
+ explodePacks r
+ retry S.empty
+ check (Just bad)
+ | S.null bad = return S.empty
+ | otherwise = do
+ notice $ unwords
+ [ "git fsck found"
+ , show (S.size bad)
+ , "broken objects. Unpacking all pack files."
+ ]
+ explodePacks r
+ removeLoose r bad
+ retry bad
+ retry oldbad = do
+ notice "Re-running git fsck to see if it finds more problems."
+ v <- findBroken r
+ case v of
+ Nothing -> error $ unwords
+ [ "git fsck found a problem, which was not corrected after removing"
+ , show (S.size oldbad)
+ , "corrupt objects."
+ ]
+ Just newbad -> do
+ removeLoose r newbad
+ let s = S.union oldbad newbad
+ if s == oldbad
+ then return s
+ else retry s
+
+removeLoose :: Repo -> S.Set Sha -> IO ()
+removeLoose r s = do
+ let fs = map (looseObjectFile r) (S.toList s)
+ count <- length <$> filterM doesFileExist fs
+ when (count > 0) $ do
+ notice $ unwords
+ [ "removing"
+ , show count
+ , "corrupt loose objects"
+ ]
+ mapM_ nukeFile fs
+
+explodePacks :: Repo -> IO ()
+explodePacks r = mapM_ go =<< listPackFiles r
+ where
+ go packfile = do
+ -- May fail, if pack file is corrupt.
+ void $ tryIO $
+ pipeWrite [Param "unpack-objects"] r $ \h ->
+ L.hPut h =<< L.readFile packfile
+ nukeFile packfile
+ nukeFile $ packIdxFile packfile
+
+{- Try to retrieve a set of missing objects, from the remotes of a
+ - repository. Returns any that could not be retreived.
+ -}
+retrieveMissingObjects :: S.Set Sha -> Repo -> IO (S.Set Sha)
+retrieveMissingObjects missing r
+ | S.null missing = return missing
+ | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
+ unlessM (boolSystem "git" [Params "init", File tmpdir]) $
+ error $ "failed to create temp repository in " ++ tmpdir
+ tmpr <- Git.Config.read =<< Git.Construct.fromAbsPath tmpdir
+ stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
+ if S.null stillmissing
+ then return stillmissing
+ else pullremotes tmpr (remotes r) fetchallrefs stillmissing
+ where
+ pullremotes tmpr [] _ stillmissing = return stillmissing
+ pullremotes tmpr (rmt:rmts) fetchrefs s
+ | S.null s = return s
+ | otherwise = do
+ notice $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
+ ifM (fetchsome rmt fetchrefs tmpr)
+ ( do
+ void $ copyObjects tmpr r
+ stillmissing <- findMissing (S.toList s) r
+ pullremotes tmpr rmts fetchrefs stillmissing
+ , do
+ notice $ unwords
+ [ "failed to fetch from remote"
+ , repoDescribe rmt
+ , "(will continue without it, but making this remote available may improve recovery)"
+ ]
+ pullremotes tmpr rmts fetchrefs s
+ )
+ fetchsome rmt ps = runBool $
+ [ Param "fetch"
+ , Param (repoLocation rmt)
+ , Params "--force --update-head-ok --quiet"
+ ] ++ ps
+ -- fetch refs and tags
+ fetchrefstags = [ Param "+refs/heads/*:refs/heads/*", Param "--tags"]
+ -- Fetch all available refs (more likely to fail,
+ -- as the remote may have refs it refuses to send).
+ fetchallrefs = [ Param "+*:*" ]
+
+{- Copies all objects from the src repository to the dest repository.
+ - This is done using rsync, so it copies all missing object, and all
+ - objects they rely on. -}
+copyObjects :: Repo -> Repo -> IO Bool
+copyObjects srcr destr = rsync
+ [ Param "-qr"
+ , File $ addTrailingPathSeparator $ objectsDir srcr
+ , File $ addTrailingPathSeparator $ objectsDir destr
+ ]
+
+{- To deal with missing objects that cannot be recovered, resets any
+ - local branches to point to an old commit before the missing
+ - objects.
+ -}
+resetLocalBranches :: S.Set Sha -> Repo -> IO [Branch]
+resetLocalBranches missing r = do
+ error "TODO"
+
+{- To deal with missing objects that cannot be recovered, removes
+ - any remote tracking branches that reference them.
+ -}
+removeTrackingBranches :: S.Set Sha -> Repo -> IO [Branch]
+removeTrackingBranches missing r = do
+ error "TODO"
+
+notice :: String -> IO ()
+notice = noticeM "RecoverRepository"
diff --git a/Makefile b/Makefile
index 586661028..0f8fdd849 100644
--- a/Makefile
+++ b/Makefile
@@ -27,8 +27,15 @@ git-annex.1: doc/git-annex.mdwn
git-annex-shell.1: doc/git-annex-shell.mdwn
./Build/mdwn2man git-annex-shell 1 doc/git-annex-shell.mdwn > git-annex-shell.1
+# These are not built normally.
git-union-merge.1: doc/git-union-merge.mdwn
./Build/mdwn2man git-union-merge 1 doc/git-union-merge.mdwn > git-union-merge.1
+git-recover-repository.1: doc/git-recover-repository.mdwn
+ ./Build/mdwn2man git-recover-repository 1 doc/git-recover-repository.mdwn > git-recover-repository.1
+git-union-merge:
+ $(GHC) --make -threaded $@
+git-recover-repository:
+ $(GHC) --make -threaded $@
install-mans: $(mans)
install -d $(DESTDIR)$(PREFIX)/share/man/man1
@@ -74,7 +81,8 @@ clean:
rm -rf tmp dist git-annex $(mans) configure *.tix .hpc \
doc/.ikiwiki html dist tags Build/SysConfig.hs build-stamp \
Setup Build/InstallDesktopFile Build/EvilSplicer \
- Build/Standalone Build/OSXMkLibs
+ Build/Standalone Build/OSXMkLibs \
+ git-union-merge git-recover-repository
find -name \*.o -exec rm {} \;
find -name \*.hi -exec rm {} \;
@@ -213,4 +221,4 @@ hdevtools:
hdevtools --stop-server || true
hdevtools check git-annex.hs -g -cpp -g -i -g -idist/build/git-annex/git-annex-tmp -g -i. -g -idist/build/autogen -g -Idist/build/autogen -g -Idist/build/git-annex/git-annex-tmp -g -IUtility -g -DWITH_TESTSUITE -g -DWITH_S3 -g -DWITH_ASSISTANT -g -DWITH_INOTIFY -g -DWITH_DBUS -g -DWITH_PAIRING -g -DWITH_XMPP -g -optP-include -g -optPdist/build/autogen/cabal_macros.h -g -odir -g dist/build/git-annex/git-annex-tmp -g -hidir -g dist/build/git-annex/git-annex-tmp -g -stubdir -g dist/build/git-annex/git-annex-tmp -g -threaded -g -Wall -g -XHaskell98 -g -XPackageImports
-.PHONY: git-annex tags build-stamp
+.PHONY: git-annex git-union-merge git-recover-repository tags build-stamp
diff --git a/Utility/Process.hs b/Utility/Process.hs
index 8ea632120..53b6d2f2f 100644
--- a/Utility/Process.hs
+++ b/Utility/Process.hs
@@ -72,17 +72,17 @@ readProcessEnv cmd args environ =
, env = environ
}
-{- Writes a string to a process on its stdin,
+{- Runs an action to write to a process on its stdin,
- returns its output, and also allows specifying the environment.
-}
writeReadProcessEnv
:: FilePath
-> [String]
-> Maybe [(String, String)]
- -> String
+ -> (Maybe (Handle -> IO ()))
-> (Maybe (Handle -> IO ()))
-> IO String
-writeReadProcessEnv cmd args environ input adjusthandle = do
+writeReadProcessEnv cmd args environ writestdin adjusthandle = do
(Just inh, Just outh, _, pid) <- createProcess p
maybe (return ()) (\a -> a inh) adjusthandle
@@ -94,7 +94,7 @@ writeReadProcessEnv cmd args environ input adjusthandle = do
_ <- forkIO $ E.evaluate (length output) >> putMVar outMVar ()
-- now write and flush any input
- when (not (null input)) $ do hPutStr inh input; hFlush inh
+ maybe (return ()) (\a -> a inh >> hFlush inh) writestdin
hClose inh -- done with stdin
-- wait on the output
diff --git a/debian/changelog b/debian/changelog
index 232267df5..7fa6fedb7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -32,6 +32,8 @@ git-annex (4.20131003) UNRELEASED; urgency=low
* Windows: Deal with strange msysgit 1.8.4 behavior of not understanding
DOS formatted paths for --git-dir and --work-tree.
* Removed workaround for bug in git 1.8.4r0.
+ * Added git-recover-repository command to git-annex source
+ (not built by default; this needs to move to someplace else).
-- Joey Hess <joeyh@debian.org> Thu, 03 Oct 2013 15:41:24 -0400
diff --git a/doc/design/assistant/disaster_recovery.mdwn b/doc/design/assistant/disaster_recovery.mdwn
index ccc6a5e6e..2b2ff085e 100644
--- a/doc/design/assistant/disaster_recovery.mdwn
+++ b/doc/design/assistant/disaster_recovery.mdwn
@@ -87,18 +87,23 @@ git-recover-repository command.
### detailed design
-Run `git fsck` and parse output to find bad objects, and determine
-from its output if they are a commit, a tree, or a blob.
-
-Check if there's a remote. If so, and if the bad objects are all
-present on it, can simply get all bad objects from the remote,
-and inject them back into .git/objects to recover:
+Run `git fsck` and parse output to find bad objects. Note that
+fsck may fall over and fail to print out all bad objects, when
+files are corrupt. So if the fsck exits nonzero, need to collect all
+bad objects it did find, and:
1. If the local repository contains packs, the packs may be corrupt.
So, start by using `git unpack-objects` to unpack all
packs it can handle (which may include parts of corrupt packs)
back to loose objects. And delete all packs.
2. Delete all loose corrupt objects.
+
+Repeat until fsck finds no new problems.
+
+Check if there's a remote. If so, and if the bad objects are all
+present on it, can simply get all bad objects from the remote,
+and inject them back into .git/objects to recover:
+
3. Make a new (bare) clone from the remote.
(Note: git does not seem to provide a way to fetch specific missing
objects from the remote. Also, cannot use `--reference` against
diff --git a/doc/git-recover-repository.mdwn b/doc/git-recover-repository.mdwn
new file mode 100644
index 000000000..b05903d14
--- /dev/null
+++ b/doc/git-recover-repository.mdwn
@@ -0,0 +1,28 @@
+# NAME
+
+git-recover-repository - Fix a broken git repository
+
+# SYNOPSIS
+
+git-recover-repository [--force]
+
+# DESCRIPTION
+
+This can fix a corrupt or broken git repository, which git fsck would
+only complain has problems.
+
+It does by deleting all corrupt objects, and retreiving all missing
+objects that it can from the remotes of the repository.
+
+If that is not sufficient to fully recover the repository, it can also
+reset branches back to commits before the corruption happened. It will only
+do this if run with the --force option, since that rewrites history
+and throws out missing data.
+
+# AUTHOR
+
+Joey Hess <joey@kitenet.net>
+
+<http://git-annex.branchable.com/>
+
+Warning: Automatically converted into a man page by mdwn2man. Edit with care
diff --git a/git-recover-repository.hs b/git-recover-repository.hs
new file mode 100644
index 000000000..6654057fa
--- /dev/null
+++ b/git-recover-repository.hs
@@ -0,0 +1,73 @@
+{- git-recover-repository program
+ -
+ - Copyright 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+import System.Environment
+import System.Log.Logger
+import System.Log.Formatter
+import System.Log.Handler (setFormatter)
+import System.Log.Handler.Simple
+import qualified Data.Set as S
+
+import Common
+import qualified Git.CurrentRepo
+import qualified Git.RecoverRepository
+import qualified Git.Config
+
+header :: String
+header = "Usage: git-recover-repository"
+
+usage :: a
+usage = error $ "bad parameters\n\n" ++ header
+
+parseArgs :: IO Bool
+parseArgs = do
+ args <- getArgs
+ return $ or $ map parse args
+ where
+ parse "--force" = True
+ parse _ = usage
+
+enableDebugOutput :: IO ()
+enableDebugOutput = do
+ s <- setFormatter
+ <$> streamHandler stderr NOTICE
+ <*> pure (simpleLogFormatter "$msg")
+ updateGlobalLogger rootLoggerName (setLevel DEBUG . setHandlers [s])
+
+main :: IO ()
+main = do
+ enableDebugOutput
+ forced <- parseArgs
+
+ g <- Git.Config.read =<< Git.CurrentRepo.get
+ missing <- Git.RecoverRepository.cleanCorruptObjects g
+ stillmissing <- Git.RecoverRepository.retrieveMissingObjects missing g
+ if S.null stillmissing
+ then putStr $ unlines
+ [ "Successfully recovered repository!"
+ , "You should run \"git fsck\" to make sure, but it looks like"
+ , "everything was recovered ok."
+ ]
+ else do
+ putStrLn $ unwords
+ [ show (S.size stillmissing)
+ , "missing objects could not be recovered!"
+ ]
+ if forced
+ then do
+ remotebranches <- Git.RecoverRepository.removeTrackingBranches stillmissing g
+ unless (null remotebranches) $
+ putStrLn $ unwords
+ [ "removed"
+ , show (length remotebranches)
+ , "remote tracking branches that referred to missing objects"
+ ]
+ localbranches <- Git.RecoverRepository.resetLocalBranches stillmissing g
+ unless (null localbranches) $ do
+ putStrLn "Reset these local branches to old versions before the missing objects were committed:"
+ putStr $ unlines $ map show localbranches
+ else putStrLn "To force a recovery to a usable state, run this command again with the --force parameter."