From 9f3d75f06c2d6f4ed6b86fb23f8dec52d8d53b85 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 13 Nov 2015 13:22:45 -0400 Subject: add stat check I have a strace taken on a lustre filesystem on which link() returned 0, but didn't actually succeed, since the file already existed. One of the linux man pages recommended using link followed by checking like this. I was reading it yesterday, but cannot find it now. --- Utility/LockFile/PidLock.hs | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'Utility') diff --git a/Utility/LockFile/PidLock.hs b/Utility/LockFile/PidLock.hs index d1ee6502a..4caf5a06b 100644 --- a/Utility/LockFile/PidLock.hs +++ b/Utility/LockFile/PidLock.hs @@ -92,7 +92,7 @@ tryLock lockfile = trySideLock lockfile $ \sidelock -> do nukeFile tmp return Nothing let tooklock = return $ Just $ LockHandle lockfile fd sidelock - ifM (isJust <$> catchMaybeIO (createLink tmp lockfile)) + ifM (linkToLock tmp lockfile) ( do nukeFile tmp tooklock @@ -111,6 +111,33 @@ tryLock lockfile = trySideLock lockfile $ \sidelock -> do _ -> failedlock ) +-- Linux man pages recommend linking a pid lock into place, +-- as the most portable atomic operation that will fail if +-- it already exists. However, on some network filesystems, +-- link will return success sometimes despite having failed, +-- so we have to stat both files to check if it actually worked. +linkToLock :: FilePath -> FilePath -> IO Bool +linkToLock src dest = ifM (isJust <$> catchMaybeIO (createLink src dest)) + ( catchDefaultIO False checklink + , return False + ) + where + checklink = do + x <- getSymbolicLinkStatus src + y <- getSymbolicLinkStatus dest + return $ and + [ deviceID x == deviceID y + , fileID x == fileID y + , fileMode x == fileMode y + , linkCount x == linkCount y + , fileOwner x == fileOwner y + , fileGroup x == fileGroup y + , specialDeviceID x == specialDeviceID y + , fileSize x == fileSize y + , modificationTime x == modificationTime y + , isRegularFile x == isRegularFile y + ] + -- | Waits as necessary to take a lock. -- -- Uses a 1 second wait-loop. -- cgit v1.2.3