From 6847c7edc5224aaaa62b68d7bca0a4e5c0297fbe Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 12 May 2015 20:11:23 -0400 Subject: Stale transfer lock and info files will be cleaned up automatically when get/unused/info commands are run. Deleting lock files is tricky, tricky stuff. I think I got it right! --- Annex/Transfer.hs | 2 +- Logs/Transfer.hs | 26 ++++++++++++++++++++++---- debian/changelog | 2 ++ doc/bugs/transfer_lock_file_removal_bug.mdwn | 26 ++++++++++++++++++++++++++ 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index d8f19eb8d..2511ae436 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -71,7 +71,7 @@ runTransfer' ignorelock t file shouldretry transferobserver transferaction = do (lck, inprogress) <- liftIO $ prep tfile mode info if inprogress && not ignorelock then do - showNote "transfer already in progress" + showNote "transfer already in progress, or unable to take transfer lock" return False else do ok <- retry info metervar $ transferaction meter diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs index 0ee69b63f..69b160331 100644 --- a/Logs/Transfer.hs +++ b/Logs/Transfer.hs @@ -123,17 +123,35 @@ startTransferInfo file = TransferInfo <*> pure file <*> pure False -{- If a transfer is still running, returns its TransferInfo. -} +{- If a transfer is still running, returns its TransferInfo. + - + - If no transfer is running, attempts to clean up the stale + - lock and info files. This can happen if a transfer process was + - interrupted. + -} checkTransfer :: Transfer -> Annex (Maybe TransferInfo) checkTransfer t = do tfile <- fromRepo $ transferFile t + let cleanstale = do + void $ tryIO $ removeFile tfile + void $ tryIO $ removeFile $ transferLockFile tfile #ifndef mingw32_HOST_OS liftIO $ do - v <- getLockStatus (transferLockFile tfile) + let lck = transferLockFile tfile + v <- getLockStatus lck case v of Just (pid, _) -> catchDefaultIO Nothing $ readTransferInfoFile (Just pid) tfile - Nothing -> return Nothing + Nothing -> do + -- Take a non-blocking lock while deleting + -- the stale lock file. + r <- tryLockExclusive Nothing lck + case r of + Just lockhandle -> do + cleanstale + dropLock lockhandle + _ -> noop + return Nothing #else v <- liftIO $ lockShared $ transferLockFile tfile liftIO $ case v of @@ -141,7 +159,7 @@ checkTransfer t = do readTransferInfoFile Nothing tfile Just lockhandle -> do dropLock lockhandle - void $ tryIO $ removeFile $ transferLockFile tfile + cleanstale return Nothing #endif diff --git a/debian/changelog b/debian/changelog index 746706ba1..6b5c8dff9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -16,6 +16,8 @@ git-annex (5.20150508.2) UNRELEASED; urgency=medium being used. * Fix an unlikely race that could result in two transfers of the same key running at once. + * Stale transfer lock and info files will be cleaned up automatically + when get/unused/info commands are run. -- Joey Hess Mon, 11 May 2015 12:45:06 -0400 diff --git a/doc/bugs/transfer_lock_file_removal_bug.mdwn b/doc/bugs/transfer_lock_file_removal_bug.mdwn index ca698b231..021ce08a6 100644 --- a/doc/bugs/transfer_lock_file_removal_bug.mdwn +++ b/doc/bugs/transfer_lock_file_removal_bug.mdwn @@ -33,3 +33,29 @@ Oh BTW, this race cannot happen on windows, since on windows, an open lock file cannot be deleted by another process. > [[fixed|done]] + +Let's also consider if this change will allow for safely expiring stale +lock files.. + +Situation before with expiry would have been buggy (which is why it never +tried to expire them): + +1. first process creates lock file, takes lock, is interrupted (no more + lock) +2. second process wants to delete stale lock file; checks if it's locked; + isn't +3. third process opens existing lock file, prepares to take lock +4. second process deletes lock file +5. third process takes lock +6. third process is now doing a transfer w/o a (visible) lock + +But, after this bug fix, the third process checks if it's lock file +exists after taking the lock. Since the second process deleted it, +the third process fails with an error "transfer in progress" +which is perhaps not accurate, but at least it stopped. + +For this to work though, the second process needs to actually take +the lock, in non-blocking mode. If it succeeds, it can keep the lock +held while deleting the lock file. This ensures that when the third process +takes the lock, the lock file will already be deleted by the time it checks +if it's there. -- cgit v1.2.3