1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
{- git repository recovery
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.RecoverRepository (
cleanCorruptObjects,
retrieveMissingObjects,
resetLocalBranches,
removeTrackingBranches,
) where
import Common
import Git
import Git.Command
import Git.Fsck
import Git.Objects
import Git.HashObject
import Git.Types
import qualified Git.Config
import qualified Git.Construct
import Utility.Tmp
import Utility.Monad
import Utility.Rsync
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import System.Log.Logger
{- Finds and removes corrupt objects from the repository, returning a list
- of all such objects, which need to be found elsewhere to finish
- recovery.
-
- Strategy: Run git fsck, remove objects it identifies as corrupt,
- and repeat until git fsck finds no new objects.
-
- To remove corrupt objects, unpack all packs, and remove the packs
- (to handle corrupt packs), and remove loose object files.
-}
cleanCorruptObjects :: Repo -> IO (S.Set Sha)
cleanCorruptObjects r = do
notice "Running git fsck ..."
check =<< findBroken r
where
check Nothing = do
notice "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file? Unpacking all pack files."
explodePacks r
retry S.empty
check (Just bad)
| S.null bad = return S.empty
| otherwise = do
notice $ unwords
[ "git fsck found"
, show (S.size bad)
, "broken objects. Unpacking all pack files."
]
explodePacks r
removeLoose r bad
retry bad
retry oldbad = do
notice "Re-running git fsck to see if it finds more problems."
v <- findBroken r
case v of
Nothing -> error $ unwords
[ "git fsck found a problem, which was not corrected after removing"
, show (S.size oldbad)
, "corrupt objects."
]
Just newbad -> do
removeLoose r newbad
let s = S.union oldbad newbad
if s == oldbad
then return s
else retry s
removeLoose :: Repo -> S.Set Sha -> IO ()
removeLoose r s = do
let fs = map (looseObjectFile r) (S.toList s)
count <- length <$> filterM doesFileExist fs
when (count > 0) $ do
notice $ unwords
[ "removing"
, show count
, "corrupt loose objects"
]
mapM_ nukeFile fs
explodePacks :: Repo -> IO ()
explodePacks r = mapM_ go =<< listPackFiles r
where
go packfile = do
-- May fail, if pack file is corrupt.
void $ tryIO $
pipeWrite [Param "unpack-objects"] r $ \h ->
L.hPut h =<< L.readFile packfile
nukeFile packfile
nukeFile $ packIdxFile packfile
{- Try to retrieve a set of missing objects, from the remotes of a
- repository. Returns any that could not be retreived.
-}
retrieveMissingObjects :: S.Set Sha -> Repo -> IO (S.Set Sha)
retrieveMissingObjects missing r
| S.null missing = return missing
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
unlessM (boolSystem "git" [Params "init", File tmpdir]) $
error $ "failed to create temp repository in " ++ tmpdir
tmpr <- Git.Config.read =<< Git.Construct.fromAbsPath tmpdir
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
if S.null stillmissing
then return stillmissing
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
where
pullremotes tmpr [] _ stillmissing = return stillmissing
pullremotes tmpr (rmt:rmts) fetchrefs s
| S.null s = return s
| otherwise = do
notice $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
ifM (fetchsome rmt fetchrefs tmpr)
( do
void $ copyObjects tmpr r
stillmissing <- findMissing (S.toList s) r
pullremotes tmpr rmts fetchrefs stillmissing
, do
notice $ unwords
[ "failed to fetch from remote"
, repoDescribe rmt
, "(will continue without it, but making this remote available may improve recovery)"
]
pullremotes tmpr rmts fetchrefs s
)
fetchsome rmt ps = runBool $
[ Param "fetch"
, Param (repoLocation rmt)
, Params "--force --update-head-ok --quiet"
] ++ ps
-- fetch refs and tags
fetchrefstags = [ Param "+refs/heads/*:refs/heads/*", Param "--tags"]
-- Fetch all available refs (more likely to fail,
-- as the remote may have refs it refuses to send).
fetchallrefs = [ Param "+*:*" ]
{- Copies all objects from the src repository to the dest repository.
- This is done using rsync, so it copies all missing object, and all
- objects they rely on. -}
copyObjects :: Repo -> Repo -> IO Bool
copyObjects srcr destr = rsync
[ Param "-qr"
, File $ addTrailingPathSeparator $ objectsDir srcr
, File $ addTrailingPathSeparator $ objectsDir destr
]
{- To deal with missing objects that cannot be recovered, resets any
- local branches to point to an old commit before the missing
- objects.
-}
resetLocalBranches :: S.Set Sha -> Repo -> IO [Branch]
resetLocalBranches missing r = do
error "TODO"
{- To deal with missing objects that cannot be recovered, removes
- any remote tracking branches that reference them.
-}
removeTrackingBranches :: S.Set Sha -> Repo -> IO [Branch]
removeTrackingBranches missing r = do
error "TODO"
notice :: String -> IO ()
notice = noticeM "RecoverRepository"
|