summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Command/Sync.hs38
-rw-r--r--debian/changelog3
-rw-r--r--doc/git-annex-sync.mdwn10
-rw-r--r--doc/todo/Support_--jobs_option_for___39__sync_--content__39__.mdwn15
4 files changed, 51 insertions, 15 deletions
diff --git a/Command/Sync.hs b/Command/Sync.hs
index 9a2417568..46a03a4de 100644
--- a/Command/Sync.hs
+++ b/Command/Sync.hs
@@ -52,9 +52,10 @@ import Control.Concurrent.MVar
import qualified Data.Map as M
cmd :: Command
-cmd = command "sync" SectionCommon
- "synchronize local repository with remotes"
- (paramRepeating paramRemote) (seek <$$> optParser)
+cmd = withGlobalOptions [jobsOption] $
+ command "sync" SectionCommon
+ "synchronize local repository with remotes"
+ (paramRepeating paramRemote) (seek <$$> optParser)
data SyncOptions = SyncOptions
{ syncWith :: CmdParams
@@ -102,7 +103,8 @@ seek o = do
-- Syncing involves many actions, any of which can independently
-- fail, without preventing the others from running.
- seekActions $ return $ concat
+ -- These actions cannot be run concurrently.
+ mapM_ includeCommandAction $ concat
[ [ commit o ]
, [ withbranch mergeLocal ]
, map (withbranch . pullRemote) gitremotes
@@ -115,14 +117,14 @@ seek o = do
-- branch on the remotes in the meantime, so pull
-- and merge again to avoid our push overwriting
-- those changes.
- seekActions $ return $ concat
+ mapM_ includeCommandAction $ concat
[ map (withbranch . pullRemote) gitremotes
, [ commitAnnex, mergeAnnex ]
]
- seekActions $ return $ concat
- [ [ withbranch pushLocal ]
- , map (withbranch . pushRemote) gitremotes
- ]
+
+ void $ includeCommandAction $ withbranch pushLocal
+ -- Pushes to remotes can run concurrently.
+ mapM_ (commandAction . withbranch . pushRemote) gitremotes
{- Merging may delete the current directory, so go to the top
- of the repo. This also means that sync always acts on all files in the
@@ -380,7 +382,9 @@ newer remote b = do
- This ensures that preferred content expressions that match on
- filenames work, even when in --all mode.
-
- - If any file movements were generated, returns true.
+ - Returns true if any file transfers were made.
+ -
+ - When concurrency is enabled, files are processed concurrently.
-}
seekSyncContent :: SyncOptions -> [Remote] -> Annex Bool
seekSyncContent o rs = do
@@ -392,15 +396,17 @@ seekSyncContent o rs = do
(seekkeys mvar bloom)
(const noop)
[]
+ finishCommandActions
liftIO $ not <$> isEmptyMVar mvar
where
seekworktree mvar l bloomfeeder = seekHelper LsFiles.inRepo l >>=
mapM_ (\f -> ifAnnexed f (go (Right bloomfeeder) mvar (Just f)) noop)
seekkeys mvar bloom getkeys =
mapM_ (go (Left bloom) mvar Nothing) =<< getkeys
- go ebloom mvar af k = do
- void $ liftIO $ tryPutMVar mvar ()
- syncFile ebloom rs af k
+ go ebloom mvar af k = commandAction $ do
+ whenM (syncFile ebloom rs af k) $
+ void $ liftIO $ tryPutMVar mvar ()
+ return Nothing
{- If it's preferred content, and we don't have it, get it from one of the
- listed remotes (preferring the cheaper earlier ones).
@@ -412,8 +418,10 @@ seekSyncContent o rs = do
-
- Drop it from each remote that has it, where it's not preferred content
- (honoring numcopies).
+ -
+ - Returns True if any file transfers were made.
-}
-syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex ()
+syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex Bool
syncFile ebloom rs af k = do
locs <- loggedLocations k
let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs
@@ -443,6 +451,8 @@ syncFile ebloom rs af k = do
-- the sync failed.
handleDropsFrom locs' rs "unwanted" True k af
Nothing callCommandAction
+
+ return (got || not (null putrs))
where
wantget have = allM id
[ pure (not $ null have)
diff --git a/debian/changelog b/debian/changelog
index 06c6a8c38..d54548047 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -5,6 +5,9 @@ git-annex (5.20150813) UNRELEASED; urgency=medium
non-data-transfer overhead 6x.
* --debug is passed along to git-annex-shell when git-annex is in debug mode.
* Added WHEREIS to external special remote protocol.
+ * sync: Support --jobs
+ * sync --content: Avoid unnecessary second pull from remotes when
+ no file transfers are made.
-- Joey Hess <id@joeyh.name> Wed, 12 Aug 2015 14:31:01 -0400
diff --git a/doc/git-annex-sync.mdwn b/doc/git-annex-sync.mdwn
index 2f7180546..b4c23f843 100644
--- a/doc/git-annex-sync.mdwn
+++ b/doc/git-annex-sync.mdwn
@@ -65,6 +65,16 @@ by running "git annex sync" on the remote.
will only match the version of files currently in the work tree, but not
past versions of files.
+* `--jobs=N` `-JN`
+
+ Enables parallel syncing with up to the specified number of jobs
+ running at once. For example: `-J10`
+
+ When there are multiple git remotes, pushes will be made to them in
+ parallel. Pulls are not done in parallel because that tends to be
+ less efficient. When --content is synced, the files are processed
+ in parallel as well.
+
# SEE ALSO
[[git-annex]](1)
diff --git a/doc/todo/Support_--jobs_option_for___39__sync_--content__39__.mdwn b/doc/todo/Support_--jobs_option_for___39__sync_--content__39__.mdwn
index cabff5ffc..9923dcff6 100644
--- a/doc/todo/Support_--jobs_option_for___39__sync_--content__39__.mdwn
+++ b/doc/todo/Support_--jobs_option_for___39__sync_--content__39__.mdwn
@@ -1 +1,14 @@
-As the subject says. I mostly use `git annex sync --content` to transfer files between repositories, as its easier than running `git annex sync`, a bunch of `git annex copy`s and then a `git annex get` to make sure I have all the files I should have. It would be good if the shortcut could also work in parallel.
+As the subject says. I mostly use `git annex sync --content` to transfer
+files between repositories, as its easier than running `git annex sync`, a
+bunch of `git annex copy`s and then a `git annex get` to make sure I have
+all the files I should have. It would be good if the shortcut could also
+work in parallel.
+
+> It also can be faster to push concurrent. OTOH, concurrent pulls
+> can lead to the same git objects being downloaded redundantly, so best to
+> avoid those I think.
+>
+> I've implemented this. It suffers from the same
+> lack of support for displaying progress when running it parallel as
+> documented on [[parallel_get]]. Other than that wart, this is [[done]].
+> --[[Joey]]