summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-03-08 14:49:30 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-03-08 14:49:30 -0400
commitd4ee2d7345b03917bbf543a42c245d379dba0d5a (patch)
treee1500bd6be6b36dd875f7c38c1fcd271b185159a
parent9fcaecb288d7eb1eeafd1603a6aa44e62063769f (diff)
get -J: Improve distribution of jobs amoung remotes when there are more jobs than remotes.
It was distributing jobs to remotes that were not being used by any other job. But, suppose that there are only 2 remotes, and -J10. In such a case, the first 2 downloads would be distributed amoung the 2 remotes, but the other 8 would all go to remote #1. Improved by keeping a counter of how many jobs are assigned to a remote, and prefer remotes with fewer jobs. Note use of Data.Map.Strict to avoid blowing up space. I kept the bang-patterns as-is, although probably not needed with Data.Map.Strict. This commit was sponsored by Jack Hill on Patreon.
-rw-r--r--Annex.hs4
-rw-r--r--Annex/Transfer.hs18
-rw-r--r--CHANGELOG2
3 files changed, 13 insertions, 11 deletions
diff --git a/Annex.hs b/Annex.hs
index 1ee6e837f..95709faec 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -136,7 +136,7 @@ data AnnexState = AnnexState
, existinghooks :: M.Map Git.Hook.Hook Bool
, desktopnotify :: DesktopNotify
, workers :: [Either AnnexState (Async AnnexState)]
- , activeremotes :: MVar (S.Set (Types.Remote.RemoteA Annex))
+ , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Maybe Keys.DbHandle
, cachedcurrentbranch :: Maybe Git.Branch
, cachedgitenv :: Maybe [(String, String)]
@@ -144,7 +144,7 @@ data AnnexState = AnnexState
newState :: GitConfig -> Git.Repo -> IO AnnexState
newState c r = do
- emptyactiveremotes <- newMVar S.empty
+ emptyactiveremotes <- newMVar M.empty
return $ AnnexState
{ repo = r
, repoadjustment = return
diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs
index d6282cbf3..0b794b379 100644
--- a/Annex/Transfer.hs
+++ b/Annex/Transfer.hs
@@ -32,7 +32,8 @@ import qualified Types.Remote as Remote
import Types.Concurrency
import Control.Concurrent
-import qualified Data.Set as S
+import qualified Data.Map.Strict as M
+import Data.Ord
class Observable a where
observeBool :: a -> Bool
@@ -218,7 +219,7 @@ pickRemote l a = go l =<< Annex.getState Annex.concurrency
go rs (Concurrent n) | n > 1 = do
mv <- Annex.getState Annex.activeremotes
active <- liftIO $ takeMVar mv
- let rs' = sortBy (inactiveFirst active) rs
+ let rs' = sortBy (lessActiveFirst active) rs
goconcurrent mv active rs'
go (r:rs) _ = do
ok <- a r
@@ -229,11 +230,11 @@ pickRemote l a = go l =<< Annex.getState Annex.concurrency
liftIO $ putMVar mv active
return observeFailure
goconcurrent mv active (r:rs) = do
- let !active' = S.insert r active
+ let !active' = M.insertWith (+) r 1 active
liftIO $ putMVar mv active'
let getnewactive = do
active'' <- liftIO $ takeMVar mv
- let !active''' = S.delete r active''
+ let !active''' = M.update (\n -> if n > 1 then Just (n-1) else Nothing) r active''
return active'''
let removeactive = liftIO . putMVar mv =<< getnewactive
ok <- a r `onException` removeactive
@@ -246,11 +247,10 @@ pickRemote l a = go l =<< Annex.getState Annex.concurrency
-- Re-sort the remaining rs
-- because other threads could have
-- been assigned them in the meantime.
- let rs' = sortBy (inactiveFirst active'') rs
+ let rs' = sortBy (lessActiveFirst active'') rs
goconcurrent mv active'' rs'
-inactiveFirst :: S.Set Remote -> Remote -> Remote -> Ordering
-inactiveFirst active a b
- | Remote.cost a == Remote.cost b =
- if a `S.member` active then GT else LT
+lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
+lessActiveFirst active a b
+ | Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b
| otherwise = compare a b
diff --git a/CHANGELOG b/CHANGELOG
index d6297bb79..012f32d0e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,8 @@ git-annex (6.20170301.2) UNRELEASED; urgency=medium
so any system ssh will be preferred over it.
* assistant: Add 1/200th second delay between checking each file
in the full transfer scan, to avoid using too much CPU.
+ * get -J: Improve distribution of jobs amoung remotes when there are more
+ jobs than remotes.
-- Joey Hess <id@joeyh.name> Thu, 02 Mar 2017 12:51:40 -0400