summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2015-11-13 16:13:43 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2015-11-13 16:18:44 -0400
commit2d0f11008a1381f6212d0ab1f33e26fb1ae22f79 (patch)
tree8309823e1d670d23bcfe30da582344c67c1dbd6b
parentfe43d0f35cc3d2df945ef97dfc979434d5f1e4be (diff)
starting to get a handle on how to detect that mad gleam in lustre's eye
-rw-r--r--Utility/LockFile/PidLock.hs38
-rw-r--r--doc/bugs/git-annex_doesn__39__t_work_on_lustre:_waitToSetLock:_unsupported_operation___40__Function_not_implemented__41__/comment_14_4dea6eac389bbf5235a3d5d3378e6d04._comment33
2 files changed, 55 insertions, 16 deletions
diff --git a/Utility/LockFile/PidLock.hs b/Utility/LockFile/PidLock.hs
index 5c5a89cc7..206127efb 100644
--- a/Utility/LockFile/PidLock.hs
+++ b/Utility/LockFile/PidLock.hs
@@ -132,27 +132,12 @@ tryLock lockfile = trySideLock lockfile $ \sidelock -> do
-- open(2) suggests that link can sometimes appear to fail
-- on NFS but have actually succeeded, and the way to find out is to stat
-- the file and check its link count etc.
---
--- On a Lustre filesystem, link has been observed to incorrectly *succeed*,
--- despite the dest already existing. A subsequent stat of the dest
--- looked like it had been replaced with the src. The process proceeded to
--- run and then deleted the dest, and after the process was done, the
--- original file was observed to still be in place. This is horrible and we
--- can't do anything about such a lying filesystem.
--- At least the side lock file will prevent git-annex's running on the same
--- host from running concurrently even on such a lying filesystem.
linkToLock :: SideLockHandle -> FilePath -> FilePath -> IO Bool
linkToLock Nothing _ _ = return False
linkToLock (Just _) src dest = do
- -- This might make Lustre notice that a lock file that is already
- -- there is there?
- _ <- catchMaybeIO $ readFile dest
_ <- tryIO $ createLink src dest
ifM (catchBoolIO checklinked)
- ( catchBoolIO $ do
- srccontent <- readFile src
- destcontent <- readFile dest
- return (srccontent == destcontent)
+ ( catchBoolIO $ not <$> checkInsaneLustre dest
, return False
)
where
@@ -173,6 +158,27 @@ linkToLock (Just _) src dest = do
, linkCount x == 2
]
+-- On a Lustre filesystem, link has been observed to incorrectly *succeed*,
+-- despite the dest already existing. A subsequent stat of the dest
+-- looked like it had been replaced with the src. The process proceeded to
+-- run and then deleted the dest, and after the process was done, the
+-- original file was observed to still be in place.
+--
+-- We can detect this insanity by getting the directory contents after
+-- making the link, and checking to see if 2 copies of the dest file,
+-- with the SAME FILENAME exist.
+checkInsaneLustre :: FilePath -> IO Bool
+checkInsaneLustre dest = do
+ fs <- dirContents (takeDirectory dest)
+ case length (filter (== dest) fs) of
+ 1 -> return False -- whew!
+ 0 -> return True -- wtf?
+ _ -> do
+ -- Try to clean up the extra copy we made
+ -- that has the same name. Egads.
+ tryIO $ removeFile dest
+ return True
+
-- | Waits as necessary to take a lock.
--
-- Uses a 1 second wait-loop.
diff --git a/doc/bugs/git-annex_doesn__39__t_work_on_lustre:_waitToSetLock:_unsupported_operation___40__Function_not_implemented__41__/comment_14_4dea6eac389bbf5235a3d5d3378e6d04._comment b/doc/bugs/git-annex_doesn__39__t_work_on_lustre:_waitToSetLock:_unsupported_operation___40__Function_not_implemented__41__/comment_14_4dea6eac389bbf5235a3d5d3378e6d04._comment
new file mode 100644
index 000000000..5bd76b6ec
--- /dev/null
+++ b/doc/bugs/git-annex_doesn__39__t_work_on_lustre:_waitToSetLock:_unsupported_operation___40__Function_not_implemented__41__/comment_14_4dea6eac389bbf5235a3d5d3378e6d04._comment
@@ -0,0 +1,33 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 14"""
+ date="2015-11-13T20:00:48Z"
+ content="""
+Adding to the crazy Lustre fun, check this out:
+
+ $ ls -l .git/annex/
+ total 56
+ -rw-rw-r-- 1 hess root 18387 Nov 13 14:35 index
+ -rw-rw-r-- 1 hess root 41 Nov 13 14:35 index.lck
+ drwxrwsr-x 2 hess root 12288 Nov 13 14:35 journal
+ -rw-rw-r-- 1 hess root 0 Nov 13 11:48 journal.lck
+ drwxrwsr-x 2 hess root 4096 Nov 13 14:35 misctmp
+ drwxrwsr-x 88 hess root 4096 Nov 13 14:35 objects
+ -r--r--r-- 1 hess root 70 Nov 13 14:35 pidlock
+ -r--r--r-- 1 hess root 70 Nov 13 14:35 pidlock
+ -rw-rw-r-- 1 hess root 0 Nov 13 11:48 sentinal
+ -rw-rw-r-- 1 hess root 23 Nov 13 11:48 sentinal.cache
+
+There are 2 pidlock files in that directory listing. 2 files with the same name.
+I deleted one of them, and with no other changes, ls shows only 1 now.
+
+ -r--r--r-- 1 hess root 74 Nov 13 14:35 pidlock
+
+Notice that the file stat has changed too.
+
+So, Lustre has clearly thrown POSIX out the window, and then defrenstrated
+sanity for good measure.
+
+On the plus side, this may show how I can detect when rename() fails to
+preserve POSIX semantics..
+"""]]