summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-10-16 12:57:28 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-10-16 12:57:28 -0400
commitfb899e75cf1ac84f3fd61ea39288811bacddee2c (patch)
tree4a988fc245f8bae27bbe7deac859378a4863f7dd
parentb3da6b94a2cecef01b48a21d37f01df4b94b01e8 (diff)
add: Replace work tree file atomically.
Before, there was a window where interrupting an add could result in the file being moved into the annex, with no symlink yet created. This commit was supported by the NSF-funded DataLad project.
-rw-r--r--Annex/Ingest.hs4
-rw-r--r--CHANGELOG3
-rw-r--r--doc/bugs/add_-J_fails_with_not_found.mdwn39
-rw-r--r--doc/bugs/add_-J_fails_with_not_found/comment_1_daf1f53ac77f664ca0ce572c1bdc45ee._comment31
-rw-r--r--doc/bugs/add_-J_fails_with_not_found/comment_2_441ded7e239f26ab3a27a2c43a6d0856._comment44
-rw-r--r--doc/bugs/add_-J_fails_with_not_found/comment_3_9749c2722b5520e13448128b68fcfaed._comment20
-rw-r--r--doc/bugs/get_-J___34__fails__34___to_get_files_with_the_same_key/comment_5_525ac8695b6253fc435112f336238c41._comment8
7 files changed, 146 insertions, 3 deletions
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs
index 4dabb1b58..5fdcd042c 100644
--- a/Annex/Ingest.hs
+++ b/Annex/Ingest.hs
@@ -128,9 +128,7 @@ ingestAdd' ld@(Just (LockedDown cfg source)) mk = do
Just k -> do
let f = keyFilename source
if lockingFile cfg
- then do
- liftIO $ nukeFile f
- addLink f k mic
+ then addLink f k mic
else ifM isDirect
( do
l <- calcRepo $ gitAnnexLink f k
diff --git a/CHANGELOG b/CHANGELOG
index 4fac7a51a..75fecedf3 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,9 @@ git-annex (6.20171004) UNRELEASED; urgency=medium
is created by initremote.
(This seems to work around some brokenness of the box.com webdav
server which was entering a redirect loop.)
+ * add: Replace work tree file atomically. Before, there was a window
+ where interrupting an add could result in the file being
+ moved into the annex, with no symlink yet created.
-- Joey Hess <id@joeyh.name> Sat, 07 Oct 2017 14:11:00 -0400
diff --git a/doc/bugs/add_-J_fails_with_not_found.mdwn b/doc/bugs/add_-J_fails_with_not_found.mdwn
new file mode 100644
index 000000000..dc0f44f0f
--- /dev/null
+++ b/doc/bugs/add_-J_fails_with_not_found.mdwn
@@ -0,0 +1,39 @@
+here, caught one for you for add (git annex version is tiny bit dated: 6.20170815+gitg22da64d0f-1~ndall+1 )
+
+(Pdb) print e.msg
+Failed to run ['git', '-c', 'receive.autogc=0', '-c', 'gc.auto=0', 'annex', 'add', '--json', '-J6', '082-sn/000001.dcm', '078-sn/000001.dcm', '080-sn/000002.dcm', '076-sn/000002.dcm', '080-sn/000004.dcm', '080-sn/000001.dcm', '079-sn/000003.dcm', '082-sn/000003.dcm', '073-sn/000002.dcm', '079-sn/000002.dcm', '079-sn/000001.dcm', '077-sn/000002.dcm', '074-sn/000001.dcm', '080-sn/000003.dcm', '077-sn/000001.dcm', '076-sn/000001.dcm', '081-sn/000002.dcm', '078-sn/000002.dcm', '081-sn/000001.dcm', '081-sn/000003.dcm', '073-sn/000001.dcm', '075-sn/000001.dcm', '079-sn/000004.dcm', '082-sn/000002.dcm', '075-sn/000002.dcm', '074-sn/000002.dcm'] under '/mnt/DICOM/test2/inbox/2016/12/12/unknown'. Exit code=1. out={"command":"add","success":true,"key":"MD5E-s193740--3da2e91e0888c05e01daf8ef9ae79570.dcm","file":"076-sn/000002.dcm"}
+{"command":"add","success":true,"key":"MD5E-s205064--851ce819ea44cabd66923d902e55cd2c.dcm","file":"082-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s226874--8acaba7ff0f57a3e69a5c1afb8bc0ba3.dcm","file":"080-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193746--faf49ea7b403c8f6191fa5b7521ebbd5.dcm","file":"078-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s205066--c5af824ecd895a342cba7ffefe019fde.dcm","file":"082-sn/000003.dcm"}
+{"command":"add","success":true,"key":"MD5E-s226872--abdaf5312c4c11da68cc9aaead6bc93a.dcm","file":"080-sn/000004.dcm"}
+{"command":"add","success":true,"key":"MD5E-s226768--352d7bf9be4b45316cb75c2fc5bfbdd2.dcm","file":"079-sn/000002.dcm"}
+{"command":"add","success":true,"key":"MD5E-s226872--755e5e693b07fd7cab07fa55e0f17fd2.dcm","file":"080-sn/000002.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193636--47816f992ddf6167549b0d20bf430036.dcm","file":"073-sn/000002.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193750--465de1ebab376633842ac9e36f6fcc35.dcm","file":"074-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s226766--04006df494da6adbe7b3eea90e4dba1b.dcm","file":"079-sn/000003.dcm"}
+{"command":"add","success":true,"key":"MD5E-s226768--3624982179994bbaa781ee8c72c5d6b9.dcm","file":"079-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193636--98a6efe8bea17485297e5d600c4c01e6.dcm","file":"077-sn/000002.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193642--15770ed37bedb6bd90a7a4b39761a1fa.dcm","file":"077-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s226870--621a7d3a2be10b99d45a39ef2526ab1c.dcm","file":"080-sn/000003.dcm"}
+{"command":"add","success":true,"key":"MD5E-s204960--e28aac5eb5e52fe47c2671e8b87edc8c.dcm","file":"081-sn/000002.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193750--25cadf2f0fc7a4ca324687a742a2d55e.dcm","file":"076-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193740--0424fce22306f7590f6d94561eb74e93.dcm","file":"078-sn/000002.dcm"}
+{"command":"add","success":true,"key":"MD5E-s204956--039309e288379d98480390043ea84ecb.dcm","file":"081-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s204960--7b32e2620301909a74adea3d29c00ed5.dcm","file":"081-sn/000003.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193646--ff7337b5330f852063652f1e75099b27.dcm","file":"073-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s226768--40821b65d32878f5859ed1b19c5633d0.dcm","file":"079-sn/000004.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193646--02704fbd9d37349c4ee96c1509f1a20d.dcm","file":"075-sn/000001.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193636--7c727d28c56da2476b744e8aca421b5c.dcm","file":"075-sn/000002.dcm"}
+{"command":"add","success":true,"key":"MD5E-s193740--c0b3731d6a46df9000df32eeb78ae894.dcm","file":"074-sn/000002.dcm"}
+{"command":"add","success":true,"key":"MD5E-s205066--11b6f6e4cb2f5879a530f83d288bb7fa.dcm","file":"082-sn/000002.dcm"}
+ err=git-annex: 075-sn/000001.dcm not found
+git-annex: add: 1 failed
+
+(Pdb)
+[1]+ Stopped ../test/addall.sh
+(dev) [yoh@rolando test2]$ ls -l '/mnt/DICOM/test2/inbox/2016/12/12/unknown/075-sn/000001.dcm'
+lrwxrwxrwx 1 yoh users 129 Dec 12 2016 /mnt/DICOM/test2/inbox/2016/12/12/unknown/075-sn/000001.dcm -> ../.git/annex/objects/ZQ/3G/MD5E-s193646--02704fbd9d37349c4ee96c1509f1a20d.dcm/MD5E-s193646--02704fbd9d37349c4ee96c1509f1a20d.dcm
+
+so it does report success in json, but complains in stderr that file is not found... I guess some race condition between workers so it manages to catch the moment when file is moved into a key or smth like that?
+
diff --git a/doc/bugs/add_-J_fails_with_not_found/comment_1_daf1f53ac77f664ca0ce572c1bdc45ee._comment b/doc/bugs/add_-J_fails_with_not_found/comment_1_daf1f53ac77f664ca0ce572c1bdc45ee._comment
new file mode 100644
index 000000000..078d14b6a
--- /dev/null
+++ b/doc/bugs/add_-J_fails_with_not_found/comment_1_daf1f53ac77f664ca0ce572c1bdc45ee._comment
@@ -0,0 +1,31 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 1"""
+ date="2017-10-16T16:08:18Z"
+ content="""
+The "<file> not found" error message comes from
+CmdLine.Seek.checkFileOrDirectoryExists, which is intended to catch
+git-annex being run with a parameter that does not exist on disk and let
+the user know about their mistake.
+
+Seems like that that's being called from withFilesOldUnlocked,
+or withFilesMaybeModified. Both of which Command.Add
+calls after withFilesNotInGit.
+
+With -J, I suppose there could be worker threads still running
+to ingest withFilesNotInGit when it moves on to
+withFilesOldUnlocked.
+
+There is a window during file ingestion where the file has been
+removed from the working tree and the annex symlink has not been
+created yet. Probably that is triggering checkFileOrDirectoryExists.
+Although I'd expect that window to be small, so it's somewhat surprising
+that yoh could reproduce this problem multiple times.
+
+The problem could be fixed in several different ways. Could wait for
+worker threads to finish before moving on to the next `with*` seek.
+Could make checkFileOrDirectoryExists only be run once, rather than 3
+times in `git annex add` (which is surely unncessary work..). Or could
+try to eliminate the window where the file is not present in the working
+tree. It may be worth doing several of those.
+"""]]
diff --git a/doc/bugs/add_-J_fails_with_not_found/comment_2_441ded7e239f26ab3a27a2c43a6d0856._comment b/doc/bugs/add_-J_fails_with_not_found/comment_2_441ded7e239f26ab3a27a2c43a6d0856._comment
new file mode 100644
index 000000000..5a1f10618
--- /dev/null
+++ b/doc/bugs/add_-J_fails_with_not_found/comment_2_441ded7e239f26ab3a27a2c43a6d0856._comment
@@ -0,0 +1,44 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 2"""
+ date="2017-10-16T16:47:57Z"
+ content="""
+Here's a way to reproduce it reliably:
+
+ joey@darkstar:~/tmp/tt>for x in $(seq 1 25); do echo $x > $x.f;done
+ joey@darkstar:~/tmp/tt>git annex add --json -J6 *.f
+ {"command":"add","success":true,"key":"SHA256E-s2--4355a46b19d348dc2f57c046f8ef63d4538ebb936000f3c9ee954a27460dd865.f","file":"1.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--917df3320d778ddbaa5c5c7742bc4046bf803c36ed2b050f30844ed206783469.f","file":"10.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--a1fb50e6c86fae1679ef3351296fd6713411a08cf8dd1790a4fd05fae8688164.f","file":"12.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--1a252402972f6057fa53cc172b52b9ffca698e18311facd0f3b06ecaaef79e17.f","file":"13.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--9a92adbc0cee38ef658c71ce1b1bf8c65668f166bfb213644c895ccb1ad07a25.f","file":"14.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--25d4f2a86deb5e2574bb3210b67bb24fcc4afb19f93a7b65a057daa874a9d18e.f","file":"11.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--238903180cc104ec2c5d8b3f20c5bc61b389ec0a967df8cc208cdc7cd454174f.f","file":"15.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--e6c21e8d260fe71882debdb339d2402a2ca7648529bc2303f48649bce0380017.f","file":"16.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--54183f4323f377b737433a1e98229ead0fdc686f93bab057ecb612daa94002b5.f","file":"17.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--7ee29791fc17e986b97128845622b077fb45e349fdb80523fac9dba879b4ad60.f","file":"18.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--a9742eb8ee320e006666aef25ae9aeed948247f3125c9cafa7cf97b7e7467dd5.f","file":"19.f"}
+ {"command":"add","success":true,"key":"SHA256E-s2--53c234e5e8472b6ac51c1ae1cab3fe06fad053beb8ebfd8977b010655bfdd3c3.f","file":"2.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--5378796307535df3ec8d8b15a2e2dc5641419c3d3060cfe32238c0fa973f7aa3.f","file":"20.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--6e2ae11dad0616f66bbb2b6e6556f580bb987fd911d7132aa6bee2bfc7cc7b52.f","file":"21.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--f14b4987904bcb5814e4459a057ed4d20f58a633152288a761214dcd28780b56.f","file":"22.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--076320a2a08267b4c026d06573bba408ea68841e73cdc20e62cce59de165ece3.f","file":"23.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--68ca3fba3b7e864770cb61aeb306d4bd4354b68ab4dd38450860c5d823e42a53.f","file":"24.f"}
+ {"command":"add","success":true,"key":"SHA256E-s3--64aeb9975f234becd55bb4635e6e2f2da7a6b7bf0a896f0c07763bdfbfb31420.f","file":"25.f"}
+ {"command":"add","success":true,"key":"SHA256E-s2--06e9d52c1720fca412803e3b07c4b228ff113e303f4c7ab94665319d832bbfb7.f","file":"6.f"}
+ {"command":"add","success":true,"key":"SHA256E-s2--aa67a169b0bba217aa0aa88a65346920c84c42447c36ba5f7ea65f422c1fe5d8.f","file":"8.f"}
+ {"command":"add","success":true,"key":"SHA256E-s2--f0b5c2c2211c8d67ed15e75e656c7862d086e9245420892a7de62cd9ec582a06.f","file":"5.f"}
+ git-annex: 9.f not found
+ {"command":"add","success":true,"key":"SHA256E-s2--10159baf262b43a92d95db59dae1f72c645127301661e0a3ce4e38b295a97c58.f","file":"7.f"}
+ {"command":"add","success":true,"key":"SHA256E-s2--7de1555df0c2700329e815b93b32c571c3ea54dc967b89e81ab73b9972b72d1d.f","file":"4.f"}
+ {"command":"add","success":true,"key":"SHA256E-s2--1121cfccd5913f0a63fec40a6ffd44ea64f9dc135c66634ba001d10bcf4302a2.f","file":"3.f"}
+ {"command":"add","success":true,"key":"SHA256E-s2--2e6d31a5983a91251bfae5aefa1c0a19d8ba3cf601d0e8a706b4cfa9661a6b8a.f","file":"9.f"}
+ git-annex: add: 1 failed
+
+Which file it fails on varies, but it fails on one of the files
+every time I've tried this.
+
+And, there's always a "success" line printed for the same file after
+it failed on it. That confirms my hypothesis that the worker thread is
+still running at the same time checkFileOrDirectoryExists gets run.
+"""]]
diff --git a/doc/bugs/add_-J_fails_with_not_found/comment_3_9749c2722b5520e13448128b68fcfaed._comment b/doc/bugs/add_-J_fails_with_not_found/comment_3_9749c2722b5520e13448128b68fcfaed._comment
new file mode 100644
index 000000000..b2ecb4c81
--- /dev/null
+++ b/doc/bugs/add_-J_fails_with_not_found/comment_3_9749c2722b5520e13448128b68fcfaed._comment
@@ -0,0 +1,20 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 2"""
+ date="2017-10-16T16:25:39Z"
+ content="""
+It would be good to avoid the window where the file is not present in the
+working tree during ingestion, because interupting `git annex add` during
+that window causes the file to go missing, with no record of it yet in git.
+
+I tried making the window longer by adding a 10 second sleep, and indeed
+interrupting `git annex add` during the window is *bad*.
+
+Looking at the code, `makeLink` uses `replaceFile` to atomicallty
+replace the file with the symlink. But `ingestAdd` for some reason
+calls `nukeFile` before `makeLink`. I could not find any good reason
+for it to do that. So, I've removed the `nukeFile`, closing the window.
+
+That change also fixed the "file not found" error. But I'm not sure
+if it's entirely dealt with the problems raised by this bug report..
+"""]]
diff --git a/doc/bugs/get_-J___34__fails__34___to_get_files_with_the_same_key/comment_5_525ac8695b6253fc435112f336238c41._comment b/doc/bugs/get_-J___34__fails__34___to_get_files_with_the_same_key/comment_5_525ac8695b6253fc435112f336238c41._comment
new file mode 100644
index 000000000..ace650c10
--- /dev/null
+++ b/doc/bugs/get_-J___34__fails__34___to_get_files_with_the_same_key/comment_5_525ac8695b6253fc435112f336238c41._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 5"""
+ date="2017-10-16T16:06:50Z"
+ content="""
+The add problem is clearly an entirely unrelated problem. Opened this
+bug report for it: <add_-J_fails_with_not_found>
+"""]]