diff options
-rw-r--r-- | Annex/Transfer.hs | 4 | ||||
-rw-r--r-- | CHANGELOG | 2 | ||||
-rw-r--r-- | Utility/LockPool/STM.hs | 30 | ||||
-rw-r--r-- | doc/bugs/parallel_get_can_fail_some_downloads_and_require_re-getting_/comment_3_6674e4dbc7437ce941bcef6272c3433b._comment | 40 |
4 files changed, 60 insertions, 16 deletions
diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index 87480b2f1..3fcf1a1b9 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -118,7 +118,9 @@ runTransfer' ignorelock t afile shouldretry transferaction = checkSecureHashes t void $ liftIO $ tryIO $ writeTransferInfoFile info tfile return (Just lockhandle, False) - , return (Nothing, True) + , do + liftIO $ dropLock lockhandle + return (Nothing, True) ) #else prep tfile _mode info = catchPermissionDenied (const prepfailed) $ do @@ -4,6 +4,8 @@ git-annex (6.20170520) UNRELEASED; urgency=medium exclamation mark, which forces gpg to use a specific subkey. * Improve progress display when watching file size, in cases where a transfer does not resume. + * Fix transfer log file locking problem when running concurrent + transfers. -- Joey Hess <id@joeyh.name> Wed, 24 May 2017 14:03:40 -0400 diff --git a/Utility/LockPool/STM.hs b/Utility/LockPool/STM.hs index d1ee0dbaf..5cb7b8834 100644 --- a/Utility/LockPool/STM.hs +++ b/Utility/LockPool/STM.hs @@ -49,9 +49,9 @@ type LockPool = TMVar (M.Map LockFile LockStatus) -- A shared global variable for the lockPool. Avoids callers needing to -- maintain state for this implementation detail. +{-# NOINLINE lockPool #-} lockPool :: LockPool lockPool = unsafePerformIO (newTMVarIO M.empty) -{-# NOINLINE lockPool #-} -- Updates the LockPool, blocking as necessary if another thread is holding -- a conflicting lock. @@ -62,23 +62,23 @@ lockPool = unsafePerformIO (newTMVarIO M.empty) -- Keeping the whole Map in a TMVar accomplishes this, at the expense of -- sometimes retrying after unrelated changes in the map. waitTakeLock :: LockPool -> LockFile -> LockMode -> STM LockHandle -waitTakeLock pool file mode = do - m <- takeTMVar pool - v <- case M.lookup file m of - Just (LockStatus mode' n closelockfile) - | mode == LockShared && mode' == LockShared -> - return $ LockStatus mode (succ n) closelockfile - | n > 0 -> retry -- wait for lock - _ -> return $ LockStatus mode 1 noop - putTMVar pool (M.insert file v m) - newTMVar (pool, file) +waitTakeLock pool file mode = maybe retry return =<< tryTakeLock pool file mode -- Avoids blocking if another thread is holding a conflicting lock. tryTakeLock :: LockPool -> LockFile -> LockMode -> STM (Maybe LockHandle) -tryTakeLock pool file mode = - (Just <$> waitTakeLock pool file mode) - `orElse` - return Nothing +tryTakeLock pool file mode = do + m <- takeTMVar pool + let success v = do + putTMVar pool (M.insert file v m) + Just <$> newTMVar (pool, file) + case M.lookup file m of + Just (LockStatus mode' n closelockfile) + | mode == LockShared && mode' == LockShared -> + success $ LockStatus mode (succ n) closelockfile + | n > 0 -> do + putTMVar pool m + return Nothing + _ -> success $ LockStatus mode 1 noop -- Call after waitTakeLock or tryTakeLock, to register a CloseLockFile -- action to run when releasing the lock. diff --git a/doc/bugs/parallel_get_can_fail_some_downloads_and_require_re-getting_/comment_3_6674e4dbc7437ce941bcef6272c3433b._comment b/doc/bugs/parallel_get_can_fail_some_downloads_and_require_re-getting_/comment_3_6674e4dbc7437ce941bcef6272c3433b._comment new file mode 100644 index 000000000..a004b75b9 --- /dev/null +++ b/doc/bugs/parallel_get_can_fail_some_downloads_and_require_re-getting_/comment_3_6674e4dbc7437ce941bcef6272c3433b._comment @@ -0,0 +1,40 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2017-05-25T19:08:59Z" + content=""" +That looks like concurrent `git config` setting remote.origin.annex-uuid +are failing. + +I have not reproduced the `.git/config` error, but with a local +clone of a repository, I have been able to reproduce some intermittent +"transfer already in progress, or unable to take transfer lock" failures +with `git annex get -J5`, happening after remote.origin.annex-uuid has been +cached. + +So, two distinct bugs I think.. + +--- + +Debugging, the lock it fails to take always seems to be the lock on +the remote side, which points to the local clone being involved somehow. + +Debugging further, Utility.LockPool.STM.tryTakeLock is what's failing. +That's supposed to only fail when another thread holds a conflicting lock, +but as it's implemented with `orElse`, if the main STM +transaction retries due to other STM activity on the same TVar, +it will give up when it shouldn't. + +That's probably why this is happening under heavier concurrency loads; +it makes that failure case much more likely. And with a local clone, +twice as much locking is done. + +I've fixed this part of it! + +--- + +The concurrent `git config` part remains. +Since git-annex can potentially have multiple threads doing different `git +config` for their own reasons concurrently, it seems it will need to add +its own locking around that. +"""]] |