aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-04-28 20:41:40 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-04-28 20:41:40 -0400
commiteef3f634e9f92e7af486e5ee4afdac9a79b034cf (patch)
treee7cdc0a9a891aef18f02b65877c076089c6c3d3c
parentb8e114bce153875db8afd20182eae03e35662737 (diff)
Avoid crashing when an existing key is readded to the annex.
-rw-r--r--Content.hs37
-rw-r--r--debian/changelog1
-rw-r--r--doc/bugs/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn46
-rw-r--r--doc/forum/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn39
4 files changed, 78 insertions, 45 deletions
diff --git a/Content.hs b/Content.hs
index 99770f553..ade936da3 100644
--- a/Content.hs
+++ b/Content.hs
@@ -182,18 +182,41 @@ allowWrite f = do
s <- getFileStatus f
setFileMode f $ fileMode s `unionFileModes` ownerWriteMode
-{- Moves a file into .git/annex/objects/ -}
+{- Moves a file into .git/annex/objects/
+ -
+ - What if the key there already has content? This could happen for
+ - various reasons; perhaps the same content is being annexed again.
+ - Perhaps there has been a hash collision generating the keys.
+ -
+ - The current strategy is to assume that in this case it's safe to delete
+ - one of the two copies of the content; and the one already in the annex
+ - is left there, assuming it's the original, canonical copy.
+ -
+ - I considered being more paranoid, and checking that both files had
+ - the same content. Decided against it because A) users explicitly choose
+ - a backend based on its hashing properties and so if they're dealing
+ - with colliding files it's their own fault and B) adding such a check
+ - would not catch all cases of colliding keys. For example, perhaps
+ - a remote has a key; if it's then added again with different content then
+ - the overall system now has two different peices of content for that
+ - key, and one of them will probably get deleted later. So, adding the
+ - check here would only raise expectations that git-annex cannot truely
+ - meet.
+ -}
moveAnnex :: Key -> FilePath -> Annex ()
moveAnnex key src = do
g <- Annex.gitRepo
let dest = gitAnnexLocation g key
let dir = parentDir dest
- liftIO $ do
- createDirectoryIfMissing True dir
- allowWrite dir -- in case the directory already exists
- renameFile src dest
- preventWrite dest
- preventWrite dir
+ e <- liftIO $ doesFileExist dest
+ if e
+ then liftIO $ removeFile src
+ else liftIO $ do
+ createDirectoryIfMissing True dir
+ allowWrite dir -- in case the directory already exists
+ renameFile src dest
+ preventWrite dest
+ preventWrite dir
{- Removes a key's file from .git/annex/objects/ -}
removeAnnex :: Key -> Annex ()
diff --git a/debian/changelog b/debian/changelog
index b7f33a704..92c05a5a6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -2,6 +2,7 @@ git-annex (0.20110428) UNRELEASED; urgency=low
* Fix hasKeyCheap setting for bup and rsync special remotes.
* Add hook special remotes.
+ * Avoid crashing when an existing key is readded to the annex.
-- Joey Hess <joeyh@debian.org> Thu, 28 Apr 2011 14:38:16 -0400
diff --git a/doc/bugs/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn b/doc/bugs/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn
new file mode 100644
index 000000000..e94312774
--- /dev/null
+++ b/doc/bugs/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn
@@ -0,0 +1,46 @@
+I'm importing a directory where some files are hard links of each other.
+
+This is confusing git-annex. Here's a small test of that:
+
+<pre>
+paulproteus@pathi:/tmp$ mkdir annex-test
+paulproteus@pathi:/tmp$ cd annex-test
+paulproteus@pathi:/tmp/annex-test$ git init
+Initialized empty Git repository in /tmp/annex-test/.git/
+paulproteus@pathi:/tmp/annex-test$ git annex init testing
+init testing ok
+paulproteus@pathi:/tmp/annex-test$ echo '* annex.backend=SHA1' >> .gitattributes
+paulproteus@pathi:/tmp/annex-test$ git commit .gitattributes -m 'Default to sha1'
+[master dd54b41] Default to sha1
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+paulproteus@pathi:/tmp/annex-test$ echo "Look at me" > file1
+paulproteus@pathi:/tmp/annex-test$ cp -l file1 file2
+paulproteus@pathi:/tmp/annex-test$ git annex add file1
+add file1 (checksum...) ok
+(Recording state in git...)
+paulproteus@pathi:/tmp/annex-test$ git commit -m 'So far, so good'
+[master eb43084] So far, so good
+ 2 files changed, 2 insertions(+), 0 deletions(-)
+ create mode 100644 .git-annex/9a3/f1f/SHA1-s11--b9c599d64212934582d676c722cf3ec61f60e09c.log
+ create mode 120000 file1
+paulproteus@pathi:/tmp/annex-test$ git annex add file2
+add file2 (checksum...)
+ git-annex: .git/annex/objects/PM/7p/SHA1-s11--b9c599d64212934582d676c722cf3ec61f60e09c/SHA1-s11--b9c599d64212934582d676c722cf3ec61f60e09c: createSymbolicLink: already exists (File exists)
+git-annex: 1 failed
+paulproteus@pathi:/tmp/annex-test$
+</pre>
+
+When trying to make a small test case for this bug, I noticed that if file1 and file2 have the same contents but are not hard links of each other, they both get annexed just fine.
+
+I think the right behavior here is to annex file2 just fine, as if they weren't hard links before.
+
+
+-- Asheesh.
+
+> The same thing happens anytime the key for a file collides with a key
+> already in the annex, AFAICS. Including when the files have the same
+> content but are not hard links.
+>
+> I've fixed this bug. The first file in wins. See commit for some
+> interesting discussion about why it should not check for hash collisions
+> in this situation. [[done]] --[[Joey]]
diff --git a/doc/forum/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn b/doc/forum/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn
index 6bcd2fc4c..38865f49a 100644
--- a/doc/forum/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn
+++ b/doc/forum/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__.mdwn
@@ -1,38 +1 @@
-I'm importing a directory where some files are hard links of each other.
-
-This is confusing git-annex. Here's a small test of that:
-
-<pre>
-paulproteus@pathi:/tmp$ mkdir annex-test
-paulproteus@pathi:/tmp$ cd annex-test
-paulproteus@pathi:/tmp/annex-test$ git init
-Initialized empty Git repository in /tmp/annex-test/.git/
-paulproteus@pathi:/tmp/annex-test$ git annex init testing
-init testing ok
-paulproteus@pathi:/tmp/annex-test$ echo '* annex.backend=SHA1' >> .gitattributes
-paulproteus@pathi:/tmp/annex-test$ git commit .gitattributes -m 'Default to sha1'
-[master dd54b41] Default to sha1
- 1 files changed, 1 insertions(+), 0 deletions(-)
-paulproteus@pathi:/tmp/annex-test$ echo "Look at me" > file1
-paulproteus@pathi:/tmp/annex-test$ cp -l file1 file2
-paulproteus@pathi:/tmp/annex-test$ git annex add file1
-add file1 (checksum...) ok
-(Recording state in git...)
-paulproteus@pathi:/tmp/annex-test$ git commit -m 'So far, so good'
-[master eb43084] So far, so good
- 2 files changed, 2 insertions(+), 0 deletions(-)
- create mode 100644 .git-annex/9a3/f1f/SHA1-s11--b9c599d64212934582d676c722cf3ec61f60e09c.log
- create mode 120000 file1
-paulproteus@pathi:/tmp/annex-test$ git annex add file2
-add file2 (checksum...)
- git-annex: .git/annex/objects/PM/7p/SHA1-s11--b9c599d64212934582d676c722cf3ec61f60e09c/SHA1-s11--b9c599d64212934582d676c722cf3ec61f60e09c: createSymbolicLink: already exists (File exists)
-git-annex: 1 failed
-paulproteus@pathi:/tmp/annex-test$
-</pre>
-
-When trying to make a small test case for this bug, I noticed that if file1 and file2 have the same contents but are not hard links of each other, they both get annexed just fine.
-
-I think the right behavior here is to annex file2 just fine, as if they weren't hard links before.
-
-
--- Asheesh.
+Moved to [[bugs|bugs/Error_while_adding_a_file___34__createSymbolicLink:_already_exists__34__]] --[[Joey]]