aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex.hs100
-rw-r--r--Annex/Transfer.hs61
-rw-r--r--CHANGELOG2
-rw-r--r--Command/Get.hs19
-rw-r--r--doc/todo/get_round_robin.mdwn9
-rw-r--r--doc/todo/wishlist__58___do_round_robin_downloading_of_data.mdwn3
6 files changed, 135 insertions, 59 deletions
diff --git a/Annex.hs b/Annex.hs
index 5ab2b748d..9bd6d34b8 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -77,7 +77,7 @@ import qualified Data.Set as S
- The MVar is not exposed outside this module.
-
- Note that when an Annex action fails and the exception is caught,
- - ny changes the action has made to the AnnexState are retained,
+ - any changes the action has made to the AnnexState are retained,
- due to the use of the MVar to store the state.
-}
newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState) IO a }
@@ -135,56 +135,60 @@ data AnnexState = AnnexState
, desktopnotify :: DesktopNotify
, workers :: [Either AnnexState (Async AnnexState)]
, concurrentjobs :: Maybe Int
+ , activeremotes :: MVar (S.Set (Types.Remote.RemoteA Annex))
, keysdbhandle :: Maybe Keys.DbHandle
, cachedcurrentbranch :: Maybe Git.Branch
}
-newState :: GitConfig -> Git.Repo -> AnnexState
-newState c r = AnnexState
- { repo = r
- , repoadjustment = return
- , gitconfig = c
- , backends = []
- , remotes = []
- , remoteannexstate = M.empty
- , output = def
- , force = False
- , fast = False
- , daemon = False
- , branchstate = startBranchState
- , repoqueue = Nothing
- , catfilehandles = M.empty
- , hashobjecthandle = Nothing
- , checkattrhandle = Nothing
- , checkignorehandle = Nothing
- , forcebackend = Nothing
- , globalnumcopies = Nothing
- , forcenumcopies = Nothing
- , limit = BuildingMatcher []
- , uuidmap = Nothing
- , preferredcontentmap = Nothing
- , requiredcontentmap = Nothing
- , forcetrust = M.empty
- , trustmap = Nothing
- , groupmap = Nothing
- , ciphers = M.empty
- , lockcache = M.empty
- , flags = M.empty
- , fields = M.empty
- , cleanup = M.empty
- , sentinalstatus = Nothing
- , useragent = Nothing
- , errcounter = 0
- , unusedkeys = Nothing
- , tempurls = M.empty
- , quviversion = Nothing
- , existinghooks = M.empty
- , desktopnotify = mempty
- , workers = []
- , concurrentjobs = Nothing
- , keysdbhandle = Nothing
- , cachedcurrentbranch = Nothing
- }
+newState :: GitConfig -> Git.Repo -> IO AnnexState
+newState c r = do
+ emptyactiveremotes <- newMVar S.empty
+ return $ AnnexState
+ { repo = r
+ , repoadjustment = return
+ , gitconfig = c
+ , backends = []
+ , remotes = []
+ , remoteannexstate = M.empty
+ , output = def
+ , force = False
+ , fast = False
+ , daemon = False
+ , branchstate = startBranchState
+ , repoqueue = Nothing
+ , catfilehandles = M.empty
+ , hashobjecthandle = Nothing
+ , checkattrhandle = Nothing
+ , checkignorehandle = Nothing
+ , forcebackend = Nothing
+ , globalnumcopies = Nothing
+ , forcenumcopies = Nothing
+ , limit = BuildingMatcher []
+ , uuidmap = Nothing
+ , preferredcontentmap = Nothing
+ , requiredcontentmap = Nothing
+ , forcetrust = M.empty
+ , trustmap = Nothing
+ , groupmap = Nothing
+ , ciphers = M.empty
+ , lockcache = M.empty
+ , flags = M.empty
+ , fields = M.empty
+ , cleanup = M.empty
+ , sentinalstatus = Nothing
+ , useragent = Nothing
+ , errcounter = 0
+ , unusedkeys = Nothing
+ , tempurls = M.empty
+ , quviversion = Nothing
+ , existinghooks = M.empty
+ , desktopnotify = mempty
+ , workers = []
+ , concurrentjobs = Nothing
+ , activeremotes = emptyactiveremotes
+ , keysdbhandle = Nothing
+ , cachedcurrentbranch = Nothing
+ }
{- Makes an Annex state object for the specified git repo.
- Ensures the config is read, if it was not already, and performs
@@ -193,7 +197,7 @@ new :: Git.Repo -> IO AnnexState
new r = do
r' <- Git.Config.read =<< Git.relPath r
let c = extractGitConfig r'
- newState c <$> fixupRepo r' c
+ newState c =<< fixupRepo r' c
{- Performs an action in the Annex monad from a starting state,
- returning a new state. -}
diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs
index a78d82ef3..6ed8ca761 100644
--- a/Annex/Transfer.hs
+++ b/Annex/Transfer.hs
@@ -1,11 +1,11 @@
{- git-annex transfers
-
- - Copyright 2012-2014 Joey Hess <id@joeyh.name>
+ - Copyright 2012-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
-{-# LANGUAGE CPP, FlexibleInstances #-}
+{-# LANGUAGE CPP, FlexibleInstances, BangPatterns #-}
module Annex.Transfer (
module X,
@@ -15,9 +15,11 @@ module Annex.Transfer (
alwaysRunTransfer,
noRetry,
forwardRetry,
+ pickRemote,
) where
import Annex.Common
+import qualified Annex
import Logs.Transfer as X
import Types.Transfer as X
import Annex.Notification as X
@@ -25,8 +27,10 @@ import Annex.Perms
import Utility.Metered
import Annex.LockPool
import Types.Remote (Verification(..))
+import qualified Types.Remote as Remote
import Control.Concurrent
+import qualified Data.Set as S
class Observable a where
observeBool :: a -> Bool
@@ -166,3 +170,56 @@ noRetry _ _ = False
- to send some data. -}
forwardRetry :: RetryDecider
forwardRetry old new = bytesComplete old < bytesComplete new
+
+{- Picks a remote from the list and tries a transfer to it. If the transfer
+ - does not succeed, goes on to try other remotes from the list.
+ -
+ - The list should already be ordered by remote cost, and is normally
+ - tried in order. However, when concurrent jobs are running, they will
+ - be assigned different remotes of the same cost when possible. This can
+ - increase total transfer speed.
+ -}
+pickRemote :: Observable v => [Remote] -> (Remote -> Annex v) -> Annex v
+pickRemote l a = go l =<< Annex.getState Annex.concurrentjobs
+ where
+ go [] _ = return observeFailure
+ go (r:[]) _ = a r
+ go rs (Just n) | n > 1 = do
+ mv <- Annex.getState Annex.activeremotes
+ active <- liftIO $ takeMVar mv
+ let rs' = sortBy (inactiveFirst active) rs
+ goconcurrent mv active rs'
+ go (r:rs) _ = do
+ ok <- a r
+ if observeBool ok
+ then return ok
+ else go rs Nothing
+ goconcurrent mv active [] = do
+ liftIO $ putMVar mv active
+ return observeFailure
+ goconcurrent mv active (r:rs) = do
+ let !active' = S.insert r active
+ liftIO $ putMVar mv active'
+ let getnewactive = do
+ active'' <- liftIO $ takeMVar mv
+ let !active''' = S.delete r active''
+ return active'''
+ let removeactive = liftIO . putMVar mv =<< getnewactive
+ ok <- a r `onException` removeactive
+ if observeBool ok
+ then do
+ removeactive
+ return ok
+ else do
+ active'' <- getnewactive
+ -- Re-sort the remaining rs
+ -- because other threads could have
+ -- been assigned them in the meantime.
+ let rs' = sortBy (inactiveFirst active'') rs
+ goconcurrent mv active'' rs'
+
+inactiveFirst :: S.Set Remote -> Remote -> Remote -> Ordering
+inactiveFirst active a b
+ | Remote.cost a == Remote.cost b =
+ if a `S.member` active then GT else LT
+ | otherwise = compare a b
diff --git a/CHANGELOG b/CHANGELOG
index 7139aaf59..0310dd3e2 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -11,6 +11,8 @@ git-annex (6.20160809) UNRELEASED; urgency=medium
Fixes a reversion introduced in version 5.20150727.
* Assistant, repair: Filter out git fsck lines about duplicate file
entries in tree objects.
+ * get -J, sync --content -J: Download different files from different
+ remotes when the remotes have the same costs.
-- Joey Hess <id@joeyh.name> Mon, 05 Sep 2016 11:51:49 -0400
diff --git a/Command/Get.hs b/Command/Get.hs
index 3d2914673..70b877065 100644
--- a/Command/Get.hs
+++ b/Command/Get.hs
@@ -89,16 +89,17 @@ getKey' key afile = dispatch
showNote "not available"
showlocs
return False
- dispatch remotes = notifyTransfer Download afile $ trycopy remotes remotes
- trycopy full [] _ = do
- Remote.showTriedRemotes full
- showlocs
- return False
- trycopy full (r:rs) witness =
- ifM (probablyPresent r)
- ( docopy r witness <||> trycopy full rs witness
- , trycopy full rs witness
+ dispatch remotes = notifyTransfer Download afile $ \witness -> do
+ ok <- pickRemote remotes $ \r -> ifM (probablyPresent r)
+ ( docopy r witness
+ , return False
)
+ if ok
+ then return ok
+ else do
+ Remote.showTriedRemotes remotes
+ showlocs
+ return False
showlocs = Remote.showLocations False key []
"No other repository is known to contain the file."
-- This check is to avoid an ugly message if a remote is a
diff --git a/doc/todo/get_round_robin.mdwn b/doc/todo/get_round_robin.mdwn
index 9fad9da9d..25c3a8644 100644
--- a/doc/todo/get_round_robin.mdwn
+++ b/doc/todo/get_round_robin.mdwn
@@ -20,3 +20,12 @@ it might not really be desirable when not run in parallel. In particular,
if A and B are on different spinning disks, then an access pattern of
A,B,A,B might keep the disks idle enough that they spin down in-between
access.
+
+> done for `git annex get -JN` where two remotes have the same cost.
+>
+> Also for `git annex sync --content -JN` when downloading and two remotes
+> have the same cost.
+>
+> Can't think of any other places that apply, except perhaps the assistant,
+> but it does its own much different queuing of transfers. [[done]]
+> --[[Joey]]
diff --git a/doc/todo/wishlist__58___do_round_robin_downloading_of_data.mdwn b/doc/todo/wishlist__58___do_round_robin_downloading_of_data.mdwn
index 2a283b3df..00777faa2 100644
--- a/doc/todo/wishlist__58___do_round_robin_downloading_of_data.mdwn
+++ b/doc/todo/wishlist__58___do_round_robin_downloading_of_data.mdwn
@@ -5,3 +5,6 @@ This of course assumes that we like the idea of "parallel" launching and running
This wish item is probably only useful for the paranoid people who store more than 1 copy of their data.
See [[get_round_robin]] --[[Joey]]
+
+> [[done]], at least for `git annex get -JN` when two remotes have the same
+> cost..