summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-01-19 17:35:36 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-01-19 17:49:54 -0400
commit6ddc6c944db103b75e007023b85137ba705179c3 (patch)
tree7e766a6d15263d58e74b98220962092ee03c3112
parent549dd5e114651ef5fc1156a4977aa18ad7a2c6d8 (diff)
sync --content: New option that makes the content of annexed files be transferred.
Similar to the assistant, this honors any configured preferred content expressions. I am not entirely happpy with the implementation. It would be nicer if the seek function returned a list of actions which included the individual file gets and copies and drops, rather than the current list of calls to syncContent. This would allow getting rid of the somewhat reundant display of "sync file [ok|failed]" after the get/put display. But, do that, withFilesInGit would need to somehow be able to construct such a mixed action list. And it would be less efficient than the current implementation, which is able to reuse several values between eg get and drop. Note that currently this does not try to satisfy numcopies when getting/putting files (numcopies are of course checked when dropping files!) This makes it like the assistant, and unlike get --auto and copy --auto, which do duplicate files when numcopies is not yet satisfied. I don't know if this is the right decision; it only seemed to make sense to have this parallel the assistant as far as possible to start with, since I know the assistant works. This commit was sponsored by Øyvind Andersen Holm.
-rw-r--r--Annex/Drop.hs118
-rw-r--r--Assistant/DaemonStatus.hs2
-rw-r--r--Assistant/Drop.hs99
-rw-r--r--Assistant/Threads/TransferScanner.hs2
-rw-r--r--Command/Get.hs6
-rw-r--r--Command/Sync.hs120
-rw-r--r--Remote.hs2
-rw-r--r--Remote/List.hs4
-rw-r--r--debian/changelog3
-rw-r--r--doc/git-annex.mdwn8
-rw-r--r--doc/preferred_content.mdwn10
11 files changed, 251 insertions, 123 deletions
diff --git a/Annex/Drop.hs b/Annex/Drop.hs
new file mode 100644
index 000000000..df64895be
--- /dev/null
+++ b/Annex/Drop.hs
@@ -0,0 +1,118 @@
+{- dropping of unwanted content
+ -
+ - Copyright 2012-2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.Drop where
+
+import Common.Annex
+import Logs.Location
+import Logs.Trust
+import Types.Remote (uuid)
+import qualified Remote
+import qualified Command.Drop
+import Command
+import Annex.Wanted
+import Annex.Exception
+import Config
+import Annex.Content.Direct
+
+import qualified Data.Set as S
+import System.Log.Logger (debugM)
+
+type Reason = String
+
+{- Drop a key from local and/or remote when allowed by the preferred content
+ - and numcopies settings.
+ -
+ - The Remote list can include other remotes that do not have the content.
+ -
+ - A remote can be specified that is known to have the key. This can be
+ - used an an optimisation when eg, a key has just been uploaded to a
+ - remote.
+ -}
+handleDrops :: Reason -> [Remote] -> Bool -> Key -> AssociatedFile -> Maybe Remote -> Annex ()
+handleDrops _ _ _ _ Nothing _ = noop
+handleDrops reason rs fromhere key f knownpresentremote = do
+ locs <- loggedLocations key
+ handleDropsFrom locs rs reason fromhere key f knownpresentremote
+
+{- The UUIDs are ones where the content is believed to be present.
+ - The Remote list can include other remotes that do not have the content;
+ - only ones that match the UUIDs will be dropped from.
+ - If allowed to drop fromhere, that drop will be tried first.
+ -
+ - In direct mode, all associated files are checked, and only if all
+ - of them are unwanted are they dropped.
+ -}
+handleDropsFrom :: [UUID] -> [Remote] -> Reason -> Bool -> Key -> AssociatedFile -> Maybe Remote -> Annex ()
+handleDropsFrom _ _ _ _ _ Nothing _ = noop
+handleDropsFrom locs rs reason fromhere key (Just afile) knownpresentremote = do
+ fs <- ifM isDirect
+ ( do
+ l <- associatedFilesRelative key
+ if null l
+ then return [afile]
+ else return l
+ , return [afile]
+ )
+ n <- getcopies fs
+ if fromhere && checkcopies n Nothing
+ then go fs rs =<< dropl fs n
+ else go fs rs n
+ where
+ getcopies fs = do
+ (untrusted, have) <- trustPartition UnTrusted locs
+ numcopies <- maximum <$> mapM (getNumCopies <=< numCopies) fs
+ return (length have, numcopies, S.fromList untrusted)
+
+ {- Check that we have enough copies still to drop the content.
+ - When the remote being dropped from is untrusted, it was not
+ - counted as a copy, so having only numcopies suffices. Otherwise,
+ - we need more than numcopies to safely drop. -}
+ checkcopies (have, numcopies, _untrusted) Nothing = have > numcopies
+ checkcopies (have, numcopies, untrusted) (Just u)
+ | S.member u untrusted = have >= numcopies
+ | otherwise = have > numcopies
+
+ decrcopies (have, numcopies, untrusted) Nothing =
+ (have - 1, numcopies, untrusted)
+ decrcopies v@(_have, _numcopies, untrusted) (Just u)
+ | S.member u untrusted = v
+ | otherwise = decrcopies v Nothing
+
+ go _ [] _ = noop
+ go fs (r:rest) n
+ | uuid r `S.notMember` slocs = go fs rest n
+ | checkcopies n (Just $ Remote.uuid r) =
+ dropr fs r n >>= go fs rest
+ | otherwise = noop
+
+ checkdrop fs n@(have, numcopies, _untrusted) u a =
+ ifM (allM (wantDrop True u . Just) fs)
+ ( ifM (safely $ doCommand $ a (Just numcopies))
+ ( do
+ liftIO $ debugM "drop" $ unwords
+ [ "dropped"
+ , afile
+ , "(from " ++ maybe "here" show u ++ ")"
+ , "(copies now " ++ show (have - 1) ++ ")"
+ , ": " ++ reason
+ ]
+ return $ decrcopies n u
+ , return n
+ )
+ , return n
+ )
+
+ dropl fs n = checkdrop fs n Nothing $ \numcopies ->
+ Command.Drop.startLocal (Just afile) numcopies key knownpresentremote
+
+ dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \numcopies ->
+ Command.Drop.startRemote (Just afile) numcopies key r
+
+ safely a = either (const False) id <$> tryAnnex a
+
+ slocs = S.fromList locs
diff --git a/Assistant/DaemonStatus.hs b/Assistant/DaemonStatus.hs
index ef1e06594..e38463ff6 100644
--- a/Assistant/DaemonStatus.hs
+++ b/Assistant/DaemonStatus.hs
@@ -59,7 +59,7 @@ calcSyncRemotes = do
return $ \dstatus -> dstatus
{ syncRemotes = syncable
- , syncGitRemotes = filter Remote.syncableRemote syncable
+ , syncGitRemotes = filter Remote.gitSyncableRemote syncable
, syncDataRemotes = syncdata
, syncingToCloudRemote = any iscloud syncdata
}
diff --git a/Assistant/Drop.hs b/Assistant/Drop.hs
index d9d812397..03ab5ab2c 100644
--- a/Assistant/Drop.hs
+++ b/Assistant/Drop.hs
@@ -5,24 +5,15 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-module Assistant.Drop where
+module Assistant.Drop (
+ handleDrops,
+ handleDropsFrom,
+) where
import Assistant.Common
import Assistant.DaemonStatus
+import Annex.Drop (handleDropsFrom, Reason)
import Logs.Location
-import Logs.Trust
-import Types.Remote (uuid)
-import qualified Remote
-import qualified Command.Drop
-import Command
-import Annex.Wanted
-import Annex.Exception
-import Config
-import Annex.Content.Direct
-
-import qualified Data.Set as S
-
-type Reason = String
{- Drop from local and/or remote when allowed by the preferred content and
- numcopies settings. -}
@@ -31,82 +22,4 @@ handleDrops _ _ _ Nothing _ = noop
handleDrops reason fromhere key f knownpresentremote = do
syncrs <- syncDataRemotes <$> getDaemonStatus
locs <- liftAnnex $ loggedLocations key
- handleDropsFrom locs syncrs reason fromhere key f knownpresentremote
-
-{- The UUIDs are ones where the content is believed to be present.
- - The Remote list can include other remotes that do not have the content;
- - only ones that match the UUIDs will be dropped from.
- - If allowed to drop fromhere, that drop will be tried first.
- -
- - In direct mode, all associated files are checked, and only if all
- - of them are unwanted are they dropped.
- -}
-handleDropsFrom :: [UUID] -> [Remote] -> Reason -> Bool -> Key -> AssociatedFile -> Maybe Remote -> Assistant ()
-handleDropsFrom _ _ _ _ _ Nothing _ = noop
-handleDropsFrom locs rs reason fromhere key (Just afile) knownpresentremote = do
- fs <- liftAnnex $ ifM isDirect
- ( do
- l <- associatedFilesRelative key
- if null l
- then return [afile]
- else return l
- , return [afile]
- )
- n <- getcopies fs
- if fromhere && checkcopies n Nothing
- then go fs rs =<< dropl fs n
- else go fs rs n
- where
- getcopies fs = liftAnnex $ do
- (untrusted, have) <- trustPartition UnTrusted locs
- numcopies <- maximum <$> mapM (getNumCopies <=< numCopies) fs
- return (length have, numcopies, S.fromList untrusted)
-
- {- Check that we have enough copies still to drop the content.
- - When the remote being dropped from is untrusted, it was not
- - counted as a copy, so having only numcopies suffices. Otherwise,
- - we need more than numcopies to safely drop. -}
- checkcopies (have, numcopies, _untrusted) Nothing = have > numcopies
- checkcopies (have, numcopies, untrusted) (Just u)
- | S.member u untrusted = have >= numcopies
- | otherwise = have > numcopies
-
- decrcopies (have, numcopies, untrusted) Nothing =
- (have - 1, numcopies, untrusted)
- decrcopies v@(_have, _numcopies, untrusted) (Just u)
- | S.member u untrusted = v
- | otherwise = decrcopies v Nothing
-
- go _ [] _ = noop
- go fs (r:rest) n
- | uuid r `S.notMember` slocs = go fs rest n
- | checkcopies n (Just $ Remote.uuid r) =
- dropr fs r n >>= go fs rest
- | otherwise = noop
-
- checkdrop fs n@(have, numcopies, _untrusted) u a =
- ifM (liftAnnex $ allM (wantDrop True u . Just) fs)
- ( ifM (liftAnnex $ safely $ doCommand $ a (Just numcopies))
- ( do
- debug
- [ "dropped"
- , afile
- , "(from " ++ maybe "here" show u ++ ")"
- , "(copies now " ++ show (have - 1) ++ ")"
- , ": " ++ reason
- ]
- return $ decrcopies n u
- , return n
- )
- , return n
- )
-
- dropl fs n = checkdrop fs n Nothing $ \numcopies ->
- Command.Drop.startLocal (Just afile) numcopies key knownpresentremote
-
- dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \numcopies ->
- Command.Drop.startRemote (Just afile) numcopies key r
-
- safely a = either (const False) id <$> tryAnnex a
-
- slocs = S.fromList locs
+ liftAnnex $ handleDropsFrom locs syncrs reason fromhere key f knownpresentremote
diff --git a/Assistant/Threads/TransferScanner.hs b/Assistant/Threads/TransferScanner.hs
index ba302d6bb..b00195789 100644
--- a/Assistant/Threads/TransferScanner.hs
+++ b/Assistant/Threads/TransferScanner.hs
@@ -156,7 +156,7 @@ expensiveScan urlrenderer rs = unless onlyweb $ batch <~> do
syncrs <- syncDataRemotes <$> getDaemonStatus
locs <- liftAnnex $ loggedLocations key
present <- liftAnnex $ inAnnex key
- handleDropsFrom locs syncrs
+ liftAnnex $ handleDropsFrom locs syncrs
"expensive scan found too many copies of object"
present key (Just f) Nothing
liftAnnex $ do
diff --git a/Command/Get.hs b/Command/Get.hs
index 9adf79393..52fbd25f9 100644
--- a/Command/Get.hs
+++ b/Command/Get.hs
@@ -59,7 +59,11 @@ perform key afile = stopUnless (getViaTmp key $ getKeyFile key afile) $
{- Try to find a copy of the file in one of the remotes,
- and copy it to here. -}
getKeyFile :: Key -> AssociatedFile -> FilePath -> Annex Bool
-getKeyFile key afile dest = dispatch =<< Remote.keyPossibilities key
+getKeyFile key afile dest = getKeyFile' key afile dest
+ =<< Remote.keyPossibilities key
+
+getKeyFile' :: Key -> AssociatedFile -> FilePath -> [Remote] -> Annex Bool
+getKeyFile' key afile dest = dispatch
where
dispatch [] = do
showNote "not available"
diff --git a/Command/Sync.hs b/Command/Sync.hs
index 38a6a5c6a..429a219bb 100644
--- a/Command/Sync.hs
+++ b/Command/Sync.hs
@@ -1,7 +1,7 @@
{- git-annex command
-
- Copyright 2011 Joachim Breitner <mail@joachim-breitner.de>
- - Copyright 2011,2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -10,10 +10,11 @@ module Command.Sync where
import Common.Annex
import Command
-import qualified Remote
import qualified Annex
import qualified Annex.Branch
import qualified Annex.Queue
+import qualified Remote
+import qualified Types.Remote as Remote
import Annex.Direct
import Annex.CatFile
import Annex.Link
@@ -26,19 +27,34 @@ import qualified Git
import Git.Types (BlobType(..))
import qualified Types.Remote
import qualified Remote.Git
+import qualified Option
import Types.Key
import Config
import Annex.ReplaceFile
import Git.FileMode
+import Annex.Wanted
+import Annex.Content
+import Command.Get (getKeyFile')
+import Logs.Transfer
+import Logs.Presence
+import Logs.Location
+import Annex.Drop
import qualified Data.Set as S
import Data.Hash.MD5
import Control.Concurrent.MVar
def :: [Command]
-def = [command "sync" (paramOptional (paramRepeating paramRemote))
+def = [withOptions syncOptions $
+ command "sync" (paramOptional (paramRepeating paramRemote))
[seek] SectionCommon "synchronize local repository with remotes"]
+syncOptions :: [Option]
+syncOptions = [ contentOption ]
+
+contentOption :: Option
+contentOption = Option.flag [] "content" "also transfer file contents"
+
-- syncing involves several operations, any of which can independently fail
seek :: CommandSeek
seek rs = do
@@ -60,17 +76,26 @@ seek rs = do
let withbranch a = a =<< getbranch
remotes <- syncRemotes rs
+ let gitremotes = filter Remote.gitSyncableRemote remotes
+
+ synccontent <- ifM (Annex.getFlag $ Option.name contentOption)
+ ( withFilesInGit (whenAnnexed $ syncContent remotes) []
+ , return []
+ )
+
return $ concat
[ [ commit ]
, [ withbranch mergeLocal ]
- , [ withbranch (pullRemote remote) | remote <- remotes ]
+ , map (withbranch . pullRemote) gitremotes
, [ mergeAnnex ]
+ , synccontent
, [ withbranch pushLocal ]
- , [ withbranch (pushRemote remote) | remote <- remotes ]
+ , map (withbranch . pushRemote) gitremotes
]
{- Merging may delete the current directory, so go to the top
- - of the repo. -}
+ - of the repo. This also means that sync always acts on all files in the
+ - repository, not just on a subdirectory. -}
prepMerge :: Annex ()
prepMerge = liftIO . setCurrentDirectory =<< fromRepo Git.repoPath
@@ -83,21 +108,16 @@ remoteBranch remote = Git.Ref.underBase $ "refs/remotes/" ++ Remote.name remote
syncRemotes :: [String] -> Annex [Remote]
syncRemotes rs = ifM (Annex.getState Annex.fast) ( nub <$> pickfast , wanted )
where
- pickfast = (++) <$> listed <*> (good =<< fastest <$> available)
+ pickfast = (++) <$> listed <*> (filterM good =<< fastest <$> available)
wanted
- | null rs = good =<< concat . Remote.byCost <$> available
+ | null rs = filterM good =<< concat . Remote.byCost <$> available
| otherwise = listed
- listed = do
- l <- catMaybes <$> mapM (Remote.byName . Just) rs
- let s = filter (not . Remote.syncableRemote) l
- unless (null s) $
- error $ "cannot sync special remotes: " ++
- unwords (map Types.Remote.name s)
- return l
- available = filter Remote.syncableRemote
- . filter (remoteAnnexSync . Types.Remote.gitconfig)
+ listed = catMaybes <$> mapM (Remote.byName . Just) rs
+ available = filter (remoteAnnexSync . Types.Remote.gitconfig)
<$> Remote.remoteList
- good = filterM $ Remote.Git.repoAvail . Types.Remote.repo
+ good r
+ | Remote.gitSyncableRemote r = Remote.Git.repoAvail $ Types.Remote.repo r
+ | otherwise = return True
fastest = fromMaybe [] . headMaybe . Remote.byCost
commit :: CommandStart
@@ -152,6 +172,9 @@ mergeLocal (Just branch) = go =<< needmerge
pushLocal :: Maybe Git.Ref -> CommandStart
pushLocal Nothing = stop
pushLocal (Just branch) = do
+ -- In case syncing content made changes to the git-annex branch,
+ -- commit it.
+ Annex.Branch.commit "update"
-- Update the sync branch to match the new state of the branch
inRepo $ updateBranch $ syncBranch branch
-- In direct mode, we're operating on some special direct mode
@@ -464,3 +487,64 @@ newer remote b = do
( inRepo $ Git.Branch.changed r b
, return True
)
+
+{- If it's preferred content, and we don't have it, get it from one of the
+ - listed remotes (preferring the cheaper earlier ones).
+ -
+ - Send it to each remote that doesn't have it, and for which it's
+ - preferred content.
+ -
+ - Drop it locally if it's not preferred content (honoring numcopies).
+ -
+ - Drop it from each remote that has it, where it's not preferred content
+ - (honoring numcopies).
+ -}
+syncContent :: [Remote] -> FilePath -> (Key, Backend) -> CommandStart
+syncContent rs f (k, _) = do
+ locs <- loggedLocations k
+ let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs
+
+ results <- mapM run =<< concat <$> sequence
+ [ handleget have
+ , handleput lack
+ ]
+ handleDropsFrom locs rs "unwanted" True k (Just f) Nothing
+ if null results
+ then stop
+ else do
+ showStart "sync" f
+ next $ next $ return $ all id results
+ where
+ run a = do
+ r <- a
+ showEndResult r
+ return r
+
+ wantget have = allM id
+ [ pure (not $ null have)
+ , not <$> inAnnex k
+ , wantGet True (Just f)
+ ]
+ handleget have = ifM (wantget have)
+ ( return [ get have ]
+ , return []
+ )
+ get have = do
+ showStart "get" f
+ getViaTmp k $ \dest -> getKeyFile' k (Just f) dest have
+
+ wantput r
+ | Remote.readonly r || remoteAnnexReadOnly (Types.Remote.gitconfig r) = return False
+ | otherwise = wantSend True (Just f) (Remote.uuid r)
+ handleput lack = ifM (inAnnex k)
+ ( map put <$> (filterM wantput lack)
+ , return []
+ )
+ put dest = do
+ showStart "copy" f
+ showAction $ "to " ++ Remote.name dest
+ ok <- upload (Remote.uuid dest) k (Just f) noRetry $
+ Remote.storeKey dest k (Just f)
+ when ok $
+ Remote.logStatus dest k InfoPresent
+ return ok
diff --git a/Remote.hs b/Remote.hs
index e355b0975..3c838a623 100644
--- a/Remote.hs
+++ b/Remote.hs
@@ -20,7 +20,7 @@ module Remote (
remoteTypes,
remoteList,
- syncableRemote,
+ gitSyncableRemote,
remoteMap,
uuidDescriptions,
byName,
diff --git a/Remote/List.hs b/Remote/List.hs
index 31a9209b1..e3afc939c 100644
--- a/Remote/List.hs
+++ b/Remote/List.hs
@@ -111,6 +111,6 @@ updateRemote remote = do
| otherwise = return r
{- Checks if a remote is syncable using git. -}
-syncableRemote :: Remote -> Bool
-syncableRemote r = remotetype r `elem`
+gitSyncableRemote :: Remote -> Bool
+gitSyncableRemote r = remotetype r `elem`
[ Remote.Git.remote, Remote.GCrypt.remote ]
diff --git a/debian/changelog b/debian/changelog
index 7dd801906..d41fe5e6d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,8 @@
git-annex (5.20140118) UNRELEASED; urgency=medium
+ * sync --content: New option that makes the content of annexed files be
+ transferred. Similar to the assistant, this honors any configured
+ preferred content expressions.
* Remove --json option from commands not supporting it.
* status: Support --json.
* list: Fix specifying of files to list.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index a3c52458f..e8058720c 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -156,8 +156,12 @@ subdirectories).
are pushed to the remote, so they can be merged into its working tree
by running "git annex sync" on the remote.
- Note that sync does not transfer any annexed file contents from or
- to the remote repositories; it only syncs the git repositories.
+ With the `--content` option, the contents of annexed files in the work
+ tree will also be uploaded and downloaded from remotes. By default,
+ this tries to get each annexed file that the local repository does not
+ yet have, and then copies each file to every remote that it is syncing with.
+ This behavior can be overridden by configuring the preferred content of
+ a repository. See see PREFERRED CONTENT below.
* `merge`
diff --git a/doc/preferred_content.mdwn b/doc/preferred_content.mdwn
index 2e12dce49..9c698c8ba 100644
--- a/doc/preferred_content.mdwn
+++ b/doc/preferred_content.mdwn
@@ -3,13 +3,15 @@ data always exist, and leaves it up to you to use commands like `git annex
get` and `git annex drop` to move the content to the repositories you want
to contain it. But sometimes, it can be good to have more fine-grained
control over which repositories prefer to have which content. Configuring
-this allows `git annex get --auto`, `git annex drop --auto`, etc to do
-smarter things.
+this allows the git-annex assistant as well as
+`git annex get --auto`, `git annex drop --auto`, `git annex sync --content`,
+etc to do smarter things.
Preferred content settings can be edited using `git
annex vicfg`, or viewed and set at the command line with `git annex wanted`.
-Each repository can have its own settings, and other repositories may also
-try to honor those settings. So there's no local `.git/config` setting it.
+Each repository can have its own settings, and other repositories will
+try to honor those settings when interacting with it.
+So there's no local `.git/config` for preferred content settings.
The idea is that you write an expression that files are matched against.
If a file matches, it's preferred to have its content stored in the