diff options
author | Joey Hess <joeyh@joeyh.name> | 2016-10-31 14:00:37 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2016-10-31 14:00:37 -0400 |
commit | 82d567a73c5d688c3e0de0abb6005d1be1fa2668 (patch) | |
tree | e09fde43ba20f45e9a1595c53bff637cc968b293 | |
parent | 8da030d180fd749103e865626e2f5389729af3f8 (diff) |
Assistant, repair: Fix ignoring of git fsck errors due to duplicate file entries in tree objects.
-rw-r--r-- | CHANGELOG | 7 | ||||
-rw-r--r-- | Git/Fsck.hs | 69 | ||||
-rw-r--r-- | doc/bugs/git-annex_wants_to_repair_because_of_duplicateEntries_in_git_fsck/comment_1_4cd4f4b5c34b3e1bf2989d0cc412ca4f._comment | 34 |
3 files changed, 91 insertions, 19 deletions
@@ -1,3 +1,10 @@ +git-annex (6.20161028) UNRELEASED; urgency=medium + + * Assistant, repair: Fix ignoring of git fsck errors due to + duplicate file entries in tree objects. + + -- Joey Hess <id@joeyh.name> Mon, 31 Oct 2016 13:59:47 -0400 + git-annex (6.20161027) unstable; urgency=medium * lock, smudge: Fix edge cases where data loss could occur in v6 mode diff --git a/Git/Fsck.hs b/Git/Fsck.hs index e50b60715..a716b56e3 100644 --- a/Git/Fsck.hs +++ b/Git/Fsck.hs @@ -5,6 +5,8 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE BangPatterns #-} + module Git.Fsck ( FsckResults(..), MissingObjects, @@ -25,8 +27,6 @@ import qualified Git.Version import qualified Data.Set as S import Control.Concurrent.Async -type MissingObjects = S.Set Sha - data FsckResults = FsckFoundMissing { missingObjects :: MissingObjects @@ -35,6 +35,25 @@ data FsckResults | FsckFailed deriving (Show) +data FsckOutput + = FsckOutput MissingObjects Truncated + | NoFsckOutput + | AllDuplicateEntriesWarning + +type MissingObjects = S.Set Sha + +type Truncated = Bool + +instance Monoid FsckOutput where + mempty = NoFsckOutput + mappend (FsckOutput s1 t1) (FsckOutput s2 t2) = FsckOutput (S.union s1 s2) (t1 || t2) + mappend (FsckOutput s t) _ = FsckOutput s t + mappend _ (FsckOutput s t) = FsckOutput s t + mappend NoFsckOutput NoFsckOutput = NoFsckOutput + mappend AllDuplicateEntriesWarning AllDuplicateEntriesWarning = AllDuplicateEntriesWarning + mappend AllDuplicateEntriesWarning NoFsckOutput = AllDuplicateEntriesWarning + mappend NoFsckOutput AllDuplicateEntriesWarning = AllDuplicateEntriesWarning + {- Runs fsck to find some of the broken objects in the repository. - May not find all broken objects, if fsck fails on bad data in some of - the broken objects it does find. @@ -58,18 +77,24 @@ findBroken batchmode r = do { std_out = CreatePipe , std_err = CreatePipe } - (bad1, bad2) <- concurrently - (readMissingObjs maxobjs r supportsNoDangling (stdoutHandle p)) - (readMissingObjs maxobjs r supportsNoDangling (stderrHandle p)) + (o1, o2) <- concurrently + (parseFsckOutput maxobjs r supportsNoDangling (stdoutHandle p)) + (parseFsckOutput maxobjs r supportsNoDangling (stderrHandle p)) fsckok <- checkSuccessProcess pid - let truncated = S.size bad1 == maxobjs || S.size bad1 == maxobjs - let badobjs = S.union bad1 bad2 - - if S.null badobjs && not fsckok - then return FsckFailed - else return $ FsckFoundMissing badobjs truncated + case mappend o1 o2 of + FsckOutput badobjs truncated + | S.null badobjs && not fsckok -> return FsckFailed + | otherwise -> return $ FsckFoundMissing badobjs truncated + NoFsckOutput + | not fsckok -> return FsckFailed + | otherwise -> return noproblem + -- If all fsck output was duplicateEntries warnings, + -- the repository is not broken, it just has some unusual + -- tree objects in it. So ignore nonzero exit status. + AllDuplicateEntriesWarning -> return noproblem where maxobjs = 10000 + noproblem = FsckFoundMissing S.empty False foundBroken :: FsckResults -> Bool foundBroken FsckFailed = True @@ -87,10 +112,18 @@ knownMissing (FsckFoundMissing s _) = s findMissing :: [Sha] -> Repo -> IO MissingObjects findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs -readMissingObjs :: Int -> Repo -> Bool -> Handle -> IO MissingObjects -readMissingObjs maxobjs r supportsNoDangling h = do - objs <- take maxobjs . findShas supportsNoDangling <$> hGetContents h - findMissing objs r +parseFsckOutput :: Int -> Repo -> Bool -> Handle -> IO FsckOutput +parseFsckOutput maxobjs r supportsNoDangling h = do + ls <- lines <$> hGetContents h + if null ls + then return NoFsckOutput + else if all ("duplicateEntries" `isInfixOf`) ls + then return AllDuplicateEntriesWarning + else do + let shas = findShas supportsNoDangling ls + let !truncated = length shas > maxobjs + missingobjs <- findMissing (take maxobjs shas) r + return $ FsckOutput missingobjs truncated isMissing :: Sha -> Repo -> IO Bool isMissing s r = either (const True) (const False) <$> tryIO dump @@ -100,12 +133,10 @@ isMissing s r = either (const True) (const False) <$> tryIO dump , Param (fromRef s) ] r -findShas :: Bool -> String -> [Sha] -findShas supportsNoDangling = catMaybes . map extractSha . concat . map words . filter wanted . lines +findShas :: Bool -> [String] -> [Sha] +findShas supportsNoDangling = catMaybes . map extractSha . concat . map words . filter wanted where wanted l - -- Skip lines like "error in tree <sha>: duplicateEntries: contains duplicate file entries" - | "duplicateEntries" `isInfixOf` l = False | supportsNoDangling = True | otherwise = not ("dangling " `isPrefixOf` l) diff --git a/doc/bugs/git-annex_wants_to_repair_because_of_duplicateEntries_in_git_fsck/comment_1_4cd4f4b5c34b3e1bf2989d0cc412ca4f._comment b/doc/bugs/git-annex_wants_to_repair_because_of_duplicateEntries_in_git_fsck/comment_1_4cd4f4b5c34b3e1bf2989d0cc412ca4f._comment new file mode 100644 index 000000000..ddda442e0 --- /dev/null +++ b/doc/bugs/git-annex_wants_to_repair_because_of_duplicateEntries_in_git_fsck/comment_1_4cd4f4b5c34b3e1bf2989d0cc412ca4f._comment @@ -0,0 +1,34 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2016-10-31T17:07:43Z" + content=""" +This was supposed to be dealt with in version 6.20161027. But, I see now +that the parser I thought was triggering on those missing object lines, +was not really (`extractSha` does not match in this case because of the +colon after the sha). + +Instead, the problem seems to be that `git fsck` is exiting nonzero. So it +assumes that fsck is failing without printing out any shas, which is a +condition that calls for repairs. + +About all I can think to do is, if fsck outputs "duplicateEntries" and +no other lines at all, and exits nonzero, treat this as a success. +This risks ignoring other reasons fsck might exit nonzero, but hopefully +it would output something else in such a case. I've implemented this. + +--- + +I am interested in getting at the root cause of the problem of +duplicate directory entries. It seems pretty likely to result from using +adjusted branches. + +It would be useful to get more information about the trees that fsck +is warning about; are they part of existing or past adjusted branches +or not? Are they merge commits? + +(Previously: [[forum/how_to_disaster_recovery]]) + +Leaving this bug open since we really need to get at the root cause of +the problem. +"""]] |