summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-10-31 14:00:37 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-10-31 14:00:37 -0400
commit82d567a73c5d688c3e0de0abb6005d1be1fa2668 (patch)
treee09fde43ba20f45e9a1595c53bff637cc968b293
parent8da030d180fd749103e865626e2f5389729af3f8 (diff)
Assistant, repair: Fix ignoring of git fsck errors due to duplicate file entries in tree objects.
-rw-r--r--CHANGELOG7
-rw-r--r--Git/Fsck.hs69
-rw-r--r--doc/bugs/git-annex_wants_to_repair_because_of_duplicateEntries_in_git_fsck/comment_1_4cd4f4b5c34b3e1bf2989d0cc412ca4f._comment34
3 files changed, 91 insertions, 19 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 94c80eeab..2b6ce1626 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,10 @@
+git-annex (6.20161028) UNRELEASED; urgency=medium
+
+ * Assistant, repair: Fix ignoring of git fsck errors due to
+ duplicate file entries in tree objects.
+
+ -- Joey Hess <id@joeyh.name> Mon, 31 Oct 2016 13:59:47 -0400
+
git-annex (6.20161027) unstable; urgency=medium
* lock, smudge: Fix edge cases where data loss could occur in v6 mode
diff --git a/Git/Fsck.hs b/Git/Fsck.hs
index e50b60715..a716b56e3 100644
--- a/Git/Fsck.hs
+++ b/Git/Fsck.hs
@@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE BangPatterns #-}
+
module Git.Fsck (
FsckResults(..),
MissingObjects,
@@ -25,8 +27,6 @@ import qualified Git.Version
import qualified Data.Set as S
import Control.Concurrent.Async
-type MissingObjects = S.Set Sha
-
data FsckResults
= FsckFoundMissing
{ missingObjects :: MissingObjects
@@ -35,6 +35,25 @@ data FsckResults
| FsckFailed
deriving (Show)
+data FsckOutput
+ = FsckOutput MissingObjects Truncated
+ | NoFsckOutput
+ | AllDuplicateEntriesWarning
+
+type MissingObjects = S.Set Sha
+
+type Truncated = Bool
+
+instance Monoid FsckOutput where
+ mempty = NoFsckOutput
+ mappend (FsckOutput s1 t1) (FsckOutput s2 t2) = FsckOutput (S.union s1 s2) (t1 || t2)
+ mappend (FsckOutput s t) _ = FsckOutput s t
+ mappend _ (FsckOutput s t) = FsckOutput s t
+ mappend NoFsckOutput NoFsckOutput = NoFsckOutput
+ mappend AllDuplicateEntriesWarning AllDuplicateEntriesWarning = AllDuplicateEntriesWarning
+ mappend AllDuplicateEntriesWarning NoFsckOutput = AllDuplicateEntriesWarning
+ mappend NoFsckOutput AllDuplicateEntriesWarning = AllDuplicateEntriesWarning
+
{- Runs fsck to find some of the broken objects in the repository.
- May not find all broken objects, if fsck fails on bad data in some of
- the broken objects it does find.
@@ -58,18 +77,24 @@ findBroken batchmode r = do
{ std_out = CreatePipe
, std_err = CreatePipe
}
- (bad1, bad2) <- concurrently
- (readMissingObjs maxobjs r supportsNoDangling (stdoutHandle p))
- (readMissingObjs maxobjs r supportsNoDangling (stderrHandle p))
+ (o1, o2) <- concurrently
+ (parseFsckOutput maxobjs r supportsNoDangling (stdoutHandle p))
+ (parseFsckOutput maxobjs r supportsNoDangling (stderrHandle p))
fsckok <- checkSuccessProcess pid
- let truncated = S.size bad1 == maxobjs || S.size bad1 == maxobjs
- let badobjs = S.union bad1 bad2
-
- if S.null badobjs && not fsckok
- then return FsckFailed
- else return $ FsckFoundMissing badobjs truncated
+ case mappend o1 o2 of
+ FsckOutput badobjs truncated
+ | S.null badobjs && not fsckok -> return FsckFailed
+ | otherwise -> return $ FsckFoundMissing badobjs truncated
+ NoFsckOutput
+ | not fsckok -> return FsckFailed
+ | otherwise -> return noproblem
+ -- If all fsck output was duplicateEntries warnings,
+ -- the repository is not broken, it just has some unusual
+ -- tree objects in it. So ignore nonzero exit status.
+ AllDuplicateEntriesWarning -> return noproblem
where
maxobjs = 10000
+ noproblem = FsckFoundMissing S.empty False
foundBroken :: FsckResults -> Bool
foundBroken FsckFailed = True
@@ -87,10 +112,18 @@ knownMissing (FsckFoundMissing s _) = s
findMissing :: [Sha] -> Repo -> IO MissingObjects
findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs
-readMissingObjs :: Int -> Repo -> Bool -> Handle -> IO MissingObjects
-readMissingObjs maxobjs r supportsNoDangling h = do
- objs <- take maxobjs . findShas supportsNoDangling <$> hGetContents h
- findMissing objs r
+parseFsckOutput :: Int -> Repo -> Bool -> Handle -> IO FsckOutput
+parseFsckOutput maxobjs r supportsNoDangling h = do
+ ls <- lines <$> hGetContents h
+ if null ls
+ then return NoFsckOutput
+ else if all ("duplicateEntries" `isInfixOf`) ls
+ then return AllDuplicateEntriesWarning
+ else do
+ let shas = findShas supportsNoDangling ls
+ let !truncated = length shas > maxobjs
+ missingobjs <- findMissing (take maxobjs shas) r
+ return $ FsckOutput missingobjs truncated
isMissing :: Sha -> Repo -> IO Bool
isMissing s r = either (const True) (const False) <$> tryIO dump
@@ -100,12 +133,10 @@ isMissing s r = either (const True) (const False) <$> tryIO dump
, Param (fromRef s)
] r
-findShas :: Bool -> String -> [Sha]
-findShas supportsNoDangling = catMaybes . map extractSha . concat . map words . filter wanted . lines
+findShas :: Bool -> [String] -> [Sha]
+findShas supportsNoDangling = catMaybes . map extractSha . concat . map words . filter wanted
where
wanted l
- -- Skip lines like "error in tree <sha>: duplicateEntries: contains duplicate file entries"
- | "duplicateEntries" `isInfixOf` l = False
| supportsNoDangling = True
| otherwise = not ("dangling " `isPrefixOf` l)
diff --git a/doc/bugs/git-annex_wants_to_repair_because_of_duplicateEntries_in_git_fsck/comment_1_4cd4f4b5c34b3e1bf2989d0cc412ca4f._comment b/doc/bugs/git-annex_wants_to_repair_because_of_duplicateEntries_in_git_fsck/comment_1_4cd4f4b5c34b3e1bf2989d0cc412ca4f._comment
new file mode 100644
index 000000000..ddda442e0
--- /dev/null
+++ b/doc/bugs/git-annex_wants_to_repair_because_of_duplicateEntries_in_git_fsck/comment_1_4cd4f4b5c34b3e1bf2989d0cc412ca4f._comment
@@ -0,0 +1,34 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 1"""
+ date="2016-10-31T17:07:43Z"
+ content="""
+This was supposed to be dealt with in version 6.20161027. But, I see now
+that the parser I thought was triggering on those missing object lines,
+was not really (`extractSha` does not match in this case because of the
+colon after the sha).
+
+Instead, the problem seems to be that `git fsck` is exiting nonzero. So it
+assumes that fsck is failing without printing out any shas, which is a
+condition that calls for repairs.
+
+About all I can think to do is, if fsck outputs "duplicateEntries" and
+no other lines at all, and exits nonzero, treat this as a success.
+This risks ignoring other reasons fsck might exit nonzero, but hopefully
+it would output something else in such a case. I've implemented this.
+
+---
+
+I am interested in getting at the root cause of the problem of
+duplicate directory entries. It seems pretty likely to result from using
+adjusted branches.
+
+It would be useful to get more information about the trees that fsck
+is warning about; are they part of existing or past adjusted branches
+or not? Are they merge commits?
+
+(Previously: [[forum/how_to_disaster_recovery]])
+
+Leaving this bug open since we really need to get at the root cause of
+the problem.
+"""]]