summaryrefslogtreecommitdiff
path: root/Git/RecoverRepository.hs
blob: 53fbf0ce7b620a1ec527898939d7db46f4c8c50f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
{- git repository recovery
 -
 - Copyright 2013 Joey Hess <joey@kitenet.net>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

module Git.RecoverRepository (
	cleanCorruptObjects,
	retrieveMissingObjects,
	resetLocalBranches,
	removeTrackingBranches,
) where

import Common
import Git
import Git.Command
import Git.Fsck
import Git.Objects
import Git.HashObject
import Git.Types
import qualified Git.Config
import qualified Git.Construct
import Utility.Tmp
import Utility.Monad
import Utility.Rsync

import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import System.Log.Logger

{- Finds and removes corrupt objects from the repository, returning a list
 - of all such objects, which need to be found elsewhere to finish
 - recovery.
 -
 - Strategy: Run git fsck, remove objects it identifies as corrupt,
 - and repeat until git fsck finds no new objects.
 -
 - To remove corrupt objects, unpack all packs, and remove the packs
 - (to handle corrupt packs), and remove loose object files.
 -}
cleanCorruptObjects :: Repo -> IO (S.Set Sha)
cleanCorruptObjects r = do
	notice "Running git fsck ..."
	check =<< findBroken r
  where
  	check Nothing = do
		notice "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file? Unpacking all pack files."
		explodePacks r
		retry S.empty
	check (Just bad)
		| S.null bad = return S.empty
		| otherwise = do
			notice $ unwords 
				[ "git fsck found"
				, show (S.size bad)
				, "broken objects. Unpacking all pack files."
				]
			explodePacks r
			removeLoose r bad
			retry bad
	retry oldbad = do
		notice "Re-running git fsck to see if it finds more problems."
		v <- findBroken r
		case v of
			Nothing -> error $ unwords
				[ "git fsck found a problem, which was not corrected after removing"
				, show (S.size oldbad)
				, "corrupt objects."
				]
			Just newbad -> do
				removeLoose r newbad
				let s = S.union oldbad newbad
				if s == oldbad
					then return s
					else retry s

removeLoose :: Repo -> S.Set Sha -> IO ()
removeLoose r s = do
	let fs = map (looseObjectFile r) (S.toList s)
	count <- length <$> filterM doesFileExist fs
	when (count > 0) $ do
		notice $ unwords
			[ "removing"
			, show count
			, "corrupt loose objects"
			]
		mapM_ nukeFile fs

explodePacks :: Repo -> IO ()
explodePacks r = mapM_ go =<< listPackFiles r
  where
	go packfile = do
		-- May fail, if pack file is corrupt.
		void $ tryIO $
			pipeWrite [Param "unpack-objects"] r $ \h ->
				L.hPut h =<< L.readFile packfile
		nukeFile packfile
		nukeFile $ packIdxFile packfile

{- Try to retrieve a set of missing objects, from the remotes of a
 - repository. Returns any that could not be retreived.
 -}
retrieveMissingObjects :: S.Set Sha -> Repo -> IO (S.Set Sha)
retrieveMissingObjects missing r
	| S.null missing = return missing
	| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
		unlessM (boolSystem "git" [Params "init", File tmpdir]) $
			error $ "failed to create temp repository in " ++ tmpdir
		tmpr <- Git.Config.read =<< Git.Construct.fromAbsPath tmpdir
		stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
		if S.null stillmissing
			then return stillmissing
			else pullremotes tmpr (remotes r) fetchallrefs stillmissing
  where
	pullremotes tmpr [] _ stillmissing = return stillmissing
	pullremotes tmpr (rmt:rmts) fetchrefs s
		| S.null s = return s
		| otherwise = do
			notice $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
			ifM (fetchsome rmt fetchrefs tmpr)
				( do
					void $ copyObjects tmpr r
					stillmissing <- findMissing (S.toList s) r
					pullremotes tmpr rmts fetchrefs stillmissing
				, do
					notice $ unwords
						[ "failed to fetch from remote"
						, repoDescribe rmt
						, "(will continue without it, but making this remote available may improve recovery)"
						]
					pullremotes tmpr rmts fetchrefs s
				)
	fetchsome rmt ps = runBool $
		[ Param "fetch"
		, Param (repoLocation rmt)
		, Params "--force --update-head-ok --quiet"
		] ++ ps
	-- fetch refs and tags
	fetchrefstags = [ Param "+refs/heads/*:refs/heads/*", Param "--tags"]
	-- Fetch all available refs (more likely to fail,
	-- as the remote may have refs it refuses to send).
	fetchallrefs = [ Param "+*:*" ]

{- Copies all objects from the src repository to the dest repository.
 - This is done using rsync, so it copies all missing object, and all
 - objects they rely on. -}
copyObjects :: Repo -> Repo -> IO Bool
copyObjects srcr destr = rsync
	[ Param "-qr"
	, File $ addTrailingPathSeparator $ objectsDir srcr
	, File $ addTrailingPathSeparator $ objectsDir destr
	]

{- To deal with missing objects that cannot be recovered, resets any
 - local branches to point to an old commit before the missing
 - objects.
 -}
resetLocalBranches :: S.Set Sha -> Repo -> IO [Branch]
resetLocalBranches missing r = do
	error "TODO"

{- To deal with missing objects that cannot be recovered, removes
 - any remote tracking branches that reference them.
 -}
removeTrackingBranches :: S.Set Sha -> Repo -> IO [Branch]
removeTrackingBranches missing r = do
	error "TODO"

notice :: String -> IO ()
notice = noticeM "RecoverRepository"