summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-10-17 12:56:26 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-10-17 13:58:43 -0400
commit873f698d40ed95d7875fb4b07cada3f4cbd3724e (patch)
tree4b099f7e86bf471482bb82d90dc6a99cd18a2258
parentac598ce29a0dfe37c730b2d21d92d4c5b93da9ad (diff)
lock: Fix edge cases where data loss could occur in v6 mode.
In the case where the pointer file is in place, and not the content of the object, lock's performNew was called with filemodified=True, which caused it to try to repopulate the object from an unmodified associated file, of which there were none. So, the content of the object got thrown away incorrectly. This was the cause (although not the root cause) of data loss in https://github.com/datalad/datalad/issues/1020 The same problem could also occur when the work tree file is modified, but the object is not, and lock is called with --force. Added a test case for this, since it's excercising the same code path and is easier to set up than the problem above. Note that this only occurred when the keys database did not have an inode cache recorded for the annex object. Normally, the annex object would be in there, but there are of course circumstances where the inode cache is out of sync with reality, since it's only a cache. Fixed by checking if the object is unmodified; if so we don't need to try to repopulate it. This does add an additional checksum to the unlock path, but it's already checksumming the worktree file in another case, so it doesn't slow it down overall. Further investigation found a similar problem occurred when smudge --clean is called on a file and the inode cache is not populated. cleanOldKeys deleted the unmodified old object file in this case. This was also fixed by checking if the object is unmodified. In general, use of getInodeCaches and sameInodeCache is potentially dangerous if the inode cache has not gotten populated for some reason. Better to use isUnmodified. I breifly auited other places that check the inode cache, and did not see any immediate problems, but it would be easy to miss this kind of problem.
-rw-r--r--Annex/Ingest.hs7
-rw-r--r--CHANGELOG7
-rw-r--r--Command/Lock.hs40
-rw-r--r--Test.hs18
4 files changed, 47 insertions, 25 deletions
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs
index 7b1db8aa7..c120f1a4d 100644
--- a/Annex/Ingest.hs
+++ b/Annex/Ingest.hs
@@ -244,10 +244,9 @@ cleanOldKeys file newkey = do
topf <- inRepo (toTopFilePath file)
oldkeys <- filter (/= newkey)
<$> Database.Keys.getAssociatedKey topf
- forM_ oldkeys $ \key -> do
- obj <- calcRepo (gitAnnexLocation key)
- caches <- Database.Keys.getInodeCaches key
- unlessM (sameInodeCache obj caches) $ do
+ forM_ oldkeys $ \key ->
+ unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do
+ caches <- Database.Keys.getInodeCaches key
unlinkAnnex key
fs <- filter (/= ingestedf)
. map (`fromTopFilePath` g)
diff --git a/CHANGELOG b/CHANGELOG
index 06966c341..7dd7b8d2b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,10 @@
+git-annex (6.20161013) UNRELEASED; urgency=medium
+
+ * lock, smudge: Fix edge cases where data loss could occur in v6 mode
+ when the keys database was not populated.
+
+ -- Joey Hess <id@joeyh.name> Mon, 17 Oct 2016 12:46:54 -0400
+
git-annex (6.20161012) unstable; urgency=medium
* Optimisations to time it takes git-annex to walk working tree and find
diff --git a/Command/Lock.hs b/Command/Lock.hs
index f9d903622..68360705c 100644
--- a/Command/Lock.hs
+++ b/Command/Lock.hs
@@ -45,27 +45,27 @@ startNew file key = ifM (isJust <$> isAnnexLink file)
)
where
go (Just key')
- | key' == key = cont True
+ | key' == key = cont
| otherwise = errorModified
go Nothing =
ifM (isUnmodified key file)
- ( cont False
+ ( cont
, ifM (Annex.getState Annex.force)
- ( cont True
+ ( cont
, errorModified
)
)
- cont = next . performNew file key
+ cont = next $ performNew file key
-performNew :: FilePath -> Key -> Bool -> CommandPerform
-performNew file key filemodified = do
+performNew :: FilePath -> Key -> CommandPerform
+performNew file key = do
lockdown =<< calcRepo (gitAnnexLocation key)
addLink file key
=<< withTSDelta (liftIO . genInodeCache file)
next $ cleanupNew file key
where
lockdown obj = do
- ifM (catchBoolIO $ sameInodeCache obj =<< Database.Keys.getInodeCaches key)
+ ifM (isUnmodified key obj)
( breakhardlink obj
, repopulate obj
)
@@ -83,20 +83,18 @@ performNew file key filemodified = do
Database.Keys.storeInodeCaches key [obj]
-- Try to repopulate obj from an unmodified associated file.
- repopulate obj
- | filemodified = modifyContent obj $ do
- g <- Annex.gitRepo
- fs <- map (`fromTopFilePath` g)
- <$> Database.Keys.getAssociatedFiles key
- mfile <- firstM (isUnmodified key) fs
- liftIO $ nukeFile obj
- case mfile of
- Just unmodified ->
- unlessM (checkedCopyFile key unmodified obj Nothing)
- lostcontent
- Nothing -> lostcontent
- | otherwise = modifyContent obj $
- liftIO $ renameFile file obj
+ repopulate obj = modifyContent obj $ do
+ g <- Annex.gitRepo
+ fs <- map (`fromTopFilePath` g)
+ <$> Database.Keys.getAssociatedFiles key
+ mfile <- firstM (isUnmodified key) fs
+ liftIO $ nukeFile obj
+ case mfile of
+ Just unmodified ->
+ unlessM (checkedCopyFile key unmodified obj Nothing)
+ lostcontent
+ Nothing -> lostcontent
+
lostcontent = logStatus key InfoMissing
cleanupNew :: FilePath -> Key -> CommandCleanup
diff --git a/Test.hs b/Test.hs
index abd26e4ea..3f6727721 100644
--- a/Test.hs
+++ b/Test.hs
@@ -212,6 +212,7 @@ unitTests note = testGroup ("Unit Tests " ++ note)
, testCase "move" test_move
, testCase "copy" test_copy
, testCase "lock" test_lock
+ , testCase "lock (v6 --force)" test_lock_v6_force
, testCase "edit (no pre-commit)" test_edit
, testCase "edit (pre-commit)" test_edit_precommit
, testCase "partial commit" test_partial_commit
@@ -613,6 +614,23 @@ test_lock = intmpclonerepoInDirect $ do
r' <- git_annex "drop" [annexedfile]
not r' @? "drop wrongly succeeded with no known copy of modified file"
+-- Regression test: lock --force when work tree file
+-- was modified lost the (unmodified) annex object.
+-- (Only occurred when the keys database was out of sync.)
+test_lock_v6_force :: Assertion
+test_lock_v6_force = intmpclonerepoInDirect $ do
+ git_annex "upgrade" [] @? "upgrade failed"
+ whenM (annexeval Annex.Version.versionSupportsUnlockedPointers) $ do
+ git_annex "get" [annexedfile] @? "get of file failed"
+ git_annex "unlock" [annexedfile] @? "unlock failed in v6 mode"
+ annexeval $ do
+ dbdir <- Annex.fromRepo Annex.Locations.gitAnnexKeysDb
+ liftIO $ removeDirectoryRecursive dbdir
+ writeFile annexedfile "test_lock_v6_force content"
+ not <$> git_annex "lock" [annexedfile] @? "lock of modified file failed to fail in v6 mode"
+ git_annex "lock" ["--force", annexedfile] @? "lock --force of modified file failed in v6 mode"
+ annexed_present_locked annexedfile
+
test_edit :: Assertion
test_edit = test_edit' False