summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-03-27 15:56:43 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-03-27 16:04:25 -0400
commitb40f253d6e126d699e9f298bf670fc5e875bfd86 (patch)
tree546a11e81490fcc6b098085ceebd315cf3f6a305
parent2821effce9ae95a2ef12a083ce0806fe058ac987 (diff)
start of generalizing remotes
Goal is to support multiple different types of remotes, some of which are not git repositories. To that end, added a Remote class, and moved git remote specific code into Remote.GitRemote. Remotes.hs is still present as some code has not been converted to use the new Remote class yet.
-rw-r--r--Backend/File.hs42
-rw-r--r--Remote.hs66
-rw-r--r--Remote/GitRemote.hs263
-rw-r--r--RemoteClass.hs46
-rw-r--r--Remotes.hs9
-rw-r--r--UUID.hs18
6 files changed, 405 insertions, 39 deletions
diff --git a/Backend/File.hs b/Backend/File.hs
index fb8a05255..743d8d627 100644
--- a/Backend/File.hs
+++ b/Backend/File.hs
@@ -14,14 +14,14 @@
module Backend.File (backend, checkKey) where
-import Control.Monad.State
-import System.Directory
+import Control.Monad.State (liftIO)
import Data.List
+import Data.String.Utils
import BackendClass
import LocationLog
-import Locations
-import qualified Remotes
+import qualified Remote
+import qualified RemoteClass
import qualified GitRepo as Git
import Content
import qualified Annex
@@ -51,10 +51,10 @@ dummyStore :: FilePath -> Key -> Annex Bool
dummyStore _ _ = return True
{- Try to find a copy of the file in one of the remotes,
- - and copy it over to this one. -}
+ - and copy it to here. -}
copyKeyFile :: Key -> FilePath -> Annex Bool
copyKeyFile key file = do
- (remotes, _) <- Remotes.keyPossibilities key
+ (remotes, _) <- Remote.keyPossibilities key
if null remotes
then do
showNote "not available"
@@ -72,18 +72,18 @@ copyKeyFile key file = do
then docopy r (trycopy full rs)
else trycopy full rs
-- This check is to avoid an ugly message if a remote is a
- -- drive that is not mounted. Avoid checking inAnnex for ssh
- -- remotes because that is unnecessarily slow, and the
- -- locationlog should be trusted. (If the ssh remote is down
- -- or really lacks the file, it's ok to show an ugly message
- -- before going on to the next remote.)
+ -- drive that is not mounted.
probablyPresent r =
- if not $ Git.repoIsUrl r
- then liftIO $ doesFileExist $ gitAnnexLocation r key
+ if RemoteClass.hasKeyCheap r
+ then do
+ res <- RemoteClass.hasKey r key
+ case res of
+ Right b -> return b
+ Left _ -> return False
else return True
docopy r continue = do
- showNote $ "copying from " ++ Git.repoDescribe r ++ "..."
- copied <- Remotes.copyFromRemote r key file
+ showNote $ "copying from " ++ RemoteClass.name r ++ "..."
+ copied <- RemoteClass.retrieveKeyFile r key file
if copied
then return True
else continue
@@ -97,9 +97,9 @@ checkRemoveKey key numcopiesM = do
if force || numcopiesM == Just 0
then return True
else do
- (remotes, trusteduuids) <- Remotes.keyPossibilities key
+ (remotes, trusteduuids) <- Remote.keyPossibilities key
untrusteduuids <- trustGet UnTrusted
- tocheck <- reposWithoutUUID remotes (trusteduuids++untrusteduuids)
+ let tocheck = Remote.remotesWithoutUUID remotes (trusteduuids++untrusteduuids)
numcopies <- getNumCopies numcopiesM
findcopies numcopies trusteduuids tocheck []
where
@@ -109,9 +109,9 @@ checkRemoveKey key numcopiesM = do
findcopies need have (r:rs) bad
| length have >= need = return True
| otherwise = do
- u <- getUUID r
+ let u = RemoteClass.uuid r
let dup = u `elem` have
- haskey <- Remotes.inAnnex r key
+ haskey <- (RemoteClass.hasKey r) key
case (dup, haskey) of
(False, Right True) -> findcopies need (u:have) rs bad
(False, Left _) -> findcopies need have rs (r:bad)
@@ -147,11 +147,11 @@ showLocations key exclude = do
message [] us = "Also these untrusted repositories may contain the file:\n" ++ us
message rs us = message rs [] ++ message [] us
-showTriedRemotes :: [Git.Repo] -> Annex ()
+showTriedRemotes :: [RemoteClass.Remote] -> Annex ()
showTriedRemotes [] = return ()
showTriedRemotes remotes =
showLongNote $ "Unable to access these remotes: " ++
- Remotes.list remotes
+ (join ", " $ map RemoteClass.name remotes)
getNumCopies :: Maybe Int -> Annex Int
getNumCopies (Just n) = return n
diff --git a/Remote.hs b/Remote.hs
new file mode 100644
index 000000000..9eff5556c
--- /dev/null
+++ b/Remote.hs
@@ -0,0 +1,66 @@
+{- git-annex remotes
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Remote (
+ generate,
+ keyPossibilities,
+ remotesWithUUID,
+ remotesWithoutUUID
+) where
+
+import Control.Monad.State (liftIO)
+import Data.List
+
+import RemoteClass
+import qualified Remote.GitRemote
+import Types
+import UUID
+import qualified Annex
+import Trust
+import LocationLog
+
+{- add generators for new Remotes here -}
+generators :: [Annex [Remote]]
+generators = [Remote.GitRemote.generate]
+
+{- generates a list of all available Remotes -}
+generate :: Annex [Remote]
+generate = do
+ lists <- sequence generators
+ return $ concat lists
+
+{- Filters a list of remotes to ones that have the listed uuids. -}
+remotesWithUUID :: [Remote] -> [UUID] -> [Remote]
+remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs
+
+{- Filters a list of remotes to ones that do not have the listed uuids. -}
+remotesWithoutUUID :: [Remote] -> [UUID] -> [Remote]
+remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs
+
+{- Cost ordered lists of remotes that the LocationLog indicate may have a key.
+ -
+ - Also returns a list of UUIDs that are trusted to have the key
+ - (some may not have configured remotes).
+ -}
+keyPossibilities :: Key -> Annex ([Remote], [UUID])
+keyPossibilities key = do
+ g <- Annex.gitRepo
+ u <- getUUID g
+ trusted <- trustGet Trusted
+
+ -- get uuids of all remotes that are recorded to have the key
+ uuids <- liftIO $ keyLocations g key
+ let validuuids = filter (/= u) uuids
+
+ -- note that validuuids is assumed to not have dups
+ let validtrusteduuids = intersect validuuids trusted
+
+ -- remotes that match uuids that have the key
+ allremotes <- generate
+ let validremotes = remotesWithUUID allremotes validuuids
+
+ return (sort validremotes, validtrusteduuids)
diff --git a/Remote/GitRemote.hs b/Remote/GitRemote.hs
new file mode 100644
index 000000000..ccc5f7b42
--- /dev/null
+++ b/Remote/GitRemote.hs
@@ -0,0 +1,263 @@
+{- Standard git remotes.
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Remote.GitRemote (generate) where
+
+import Control.Exception.Extensible
+import Control.Monad.State (liftIO)
+import qualified Data.Map as Map
+import Data.String.Utils
+import System.Cmd.Utils
+import Control.Monad (unless, filterM)
+
+import RemoteClass
+import Types
+import qualified GitRepo as Git
+import qualified Annex
+import Locations
+import UUID
+import Utility
+import qualified Content
+import Messages
+import CopyFile
+import RsyncFile
+import Ssh
+
+generate :: Annex [Remote]
+generate = do
+ readConfigs
+ g <- Annex.gitRepo
+ rs <- filterM repoNotIgnored (Git.remotes g)
+ mapM genRemote rs
+
+genRemote :: Git.Repo -> Annex Remote
+genRemote r = do
+ u <- getUUID r
+ c <- repoCost r
+ return Remote {
+ uuid = u,
+ cost = c,
+ name = Git.repoDescribe r,
+ storeKey = copyToRemote r,
+ retrieveKeyFile = copyFromRemote r,
+ removeKey = error "TODO Remote.GitRemote.removeKey",
+ hasKey = inAnnex r,
+ hasKeyCheap = not (Git.repoIsUrl r)
+ }
+
+{- Reads the configs of all remotes.
+ -
+ - As reading the config of remotes can be expensive, this
+ - function will only read configs once per git-annex run. It's
+ - assumed to be cheap to read the config of non-URL remotes,
+ - so this is done each time git-annex is run. Conversely,
+ - the config of an URL remote is only read when there is no
+ - cached UUID value.
+ - -}
+readConfigs :: Annex ()
+readConfigs = do
+ remotesread <- Annex.getState Annex.remotesread
+ unless remotesread $ do
+ g <- Annex.gitRepo
+ allremotes <- filterM repoNotIgnored $ Git.remotes g
+ let cheap = filter (not . Git.repoIsUrl) allremotes
+ let expensive = filter Git.repoIsUrl allremotes
+ doexpensive <- filterM cachedUUID expensive
+ unless (null doexpensive) $
+ showNote $ "getting UUID for " ++
+ list doexpensive ++ "..."
+ let todo = cheap ++ doexpensive
+ unless (null todo) $ do
+ mapM_ tryGitConfigRead todo
+ Annex.changeState $ \s -> s { Annex.remotesread = True }
+ where
+ cachedUUID r = do
+ u <- getUUID r
+ return $ null u
+
+{- The git configs for the git repo's remotes is not read on startup
+ - because reading it may be expensive. This function tries to read the
+ - config for a specified remote, and updates state. If successful, it
+ - returns the updated git repo. -}
+tryGitConfigRead :: Git.Repo -> Annex (Either Git.Repo Git.Repo)
+tryGitConfigRead r
+ | not $ Map.null $ Git.configMap r = return $ Right r -- already read
+ | Git.repoIsSsh r = store $ onRemote r (pipedconfig, r) "configlist" []
+ | Git.repoIsUrl r = return $ Left r
+ | otherwise = store $ safely $ Git.configRead r
+ where
+ -- Reading config can fail due to IO error or
+ -- for other reasons; catch all possible exceptions.
+ safely a = do
+ result <- liftIO (try (a)::IO (Either SomeException Git.Repo))
+ case result of
+ Left _ -> return r
+ Right r' -> return r'
+ pipedconfig cmd params = safely $
+ pOpen ReadFromPipe cmd (toCommand params) $
+ Git.hConfigRead r
+ store a = do
+ r' <- a
+ g <- Annex.gitRepo
+ let l = Git.remotes g
+ let g' = Git.remotesAdd g $ exchange l r'
+ Annex.changeState $ \s -> s { Annex.repo = g' }
+ return $ Right r'
+ exchange [] _ = []
+ exchange (old:ls) new =
+ if Git.repoRemoteName old == Git.repoRemoteName new
+ then new : exchange ls new
+ else old : exchange ls new
+
+{- Calculates cost for a repo.
+ -
+ - The default cost is 100 for local repositories, and 200 for remote
+ - repositories; it can also be configured by remote.<name>.annex-cost
+ -}
+repoCost :: Git.Repo -> Annex Int
+repoCost r = do
+ c <- Annex.repoConfig r "cost" ""
+ if not $ null c
+ then return $ read c
+ else if Git.repoIsUrl r
+ then return 200
+ else return 100
+
+{- Checks if a repo should be ignored, based either on annex-ignore
+ - setting, or on command-line options. Allows command-line to override
+ - annex-ignore. -}
+repoNotIgnored :: Git.Repo -> Annex Bool
+repoNotIgnored r = do
+ ignored <- Annex.repoConfig r "ignore" "false"
+ to <- match Annex.toremote
+ from <- match Annex.fromremote
+ if to || from
+ then return True
+ else return $ not $ Git.configTrue ignored
+ where
+ match a = do
+ n <- Annex.getState a
+ return $ n == Git.repoRemoteName r
+
+{- Checks if a given remote has the content for a key inAnnex.
+ - If the remote cannot be accessed, returns a Left error.
+ -}
+inAnnex :: Git.Repo -> Key -> Annex (Either IOException Bool)
+inAnnex r key = if Git.repoIsUrl r
+ then checkremote
+ else liftIO (try checklocal ::IO (Either IOException Bool))
+ where
+ checklocal = do
+ -- run a local check inexpensively,
+ -- by making an Annex monad using the remote
+ a <- Annex.new r []
+ Annex.eval a (Content.inAnnex key)
+ checkremote = do
+ showNote ("checking " ++ Git.repoDescribe r ++ "...")
+ inannex <- onRemote r (boolSystem, False) "inannex"
+ [Param (show key)]
+ return $ Right inannex
+
+{- Tries to copy a key's content from a remote's annex to a file. -}
+copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool
+copyFromRemote r key file
+ | not $ Git.repoIsUrl r = liftIO $ copyFile (gitAnnexLocation r key) file
+ | Git.repoIsSsh r = rsynchelper r True key file
+ | otherwise = error "copying from non-ssh repo not supported"
+
+{- Tries to copy a key's content to a remote's annex. -}
+copyToRemote :: Git.Repo -> Key -> Annex Bool
+copyToRemote r key
+ | not $ Git.repoIsUrl r = do
+ g <- Annex.gitRepo
+ let keysrc = gitAnnexLocation g key
+ -- run copy from perspective of remote
+ liftIO $ do
+ a <- Annex.new r []
+ Annex.eval a $ do
+ ok <- Content.getViaTmp key $
+ \f -> liftIO $ copyFile keysrc f
+ Annex.queueRun
+ return ok
+ | Git.repoIsSsh r = do
+ g <- Annex.gitRepo
+ let keysrc = gitAnnexLocation g key
+ rsynchelper r False key keysrc
+ | otherwise = error "copying to non-ssh repo not supported"
+
+rsynchelper :: Git.Repo -> Bool -> Key -> FilePath -> Annex (Bool)
+rsynchelper r sending key file = do
+ showProgress -- make way for progress bar
+ p <- rsyncParams r sending key file
+ res <- liftIO $ boolSystem "rsync" p
+ if res
+ then return res
+ else do
+ showLongNote "rsync failed -- run git annex again to resume file transfer"
+ return res
+
+{- Generates rsync parameters that ssh to the remote and asks it
+ - to either receive or send the key's content. -}
+rsyncParams :: Git.Repo -> Bool -> Key -> FilePath -> Annex [CommandParam]
+rsyncParams r sending key file = do
+ Just (shellcmd, shellparams) <- git_annex_shell r
+ (if sending then "sendkey" else "recvkey")
+ [ Param $ show key
+ -- Command is terminated with "--", because
+ -- rsync will tack on its own options afterwards,
+ -- and they need to be ignored.
+ , Param "--"
+ ]
+ -- Convert the ssh command into rsync command line.
+ let eparam = rsyncShell (Param shellcmd:shellparams)
+ o <- Annex.repoConfig r "rsync-options" ""
+ let base = options ++ map Param (words o) ++ eparam
+ if sending
+ then return $ base ++ [dummy, File file]
+ else return $ base ++ [File file, dummy]
+ where
+ -- inplace makes rsync resume partial files
+ options = [Params "-p --progress --inplace"]
+ -- the rsync shell parameter controls where rsync
+ -- goes, so the source/dest parameter can be a dummy value,
+ -- that just enables remote rsync mode.
+ dummy = Param ":"
+
+{- Uses a supplied function to run a git-annex-shell command on a remote.
+ -
+ - Or, if the remote does not support running remote commands, returns
+ - a specified error value. -}
+onRemote
+ :: Git.Repo
+ -> (FilePath -> [CommandParam] -> IO a, a)
+ -> String
+ -> [CommandParam]
+ -> Annex a
+onRemote r (with, errorval) command params = do
+ s <- git_annex_shell r command params
+ case s of
+ Just (c, ps) -> liftIO $ with c ps
+ Nothing -> return errorval
+
+{- Generates parameters to run a git-annex-shell command on a remote. -}
+git_annex_shell :: Git.Repo -> String -> [CommandParam] -> Annex (Maybe (FilePath, [CommandParam]))
+git_annex_shell r command params
+ | not $ Git.repoIsUrl r = return $ Just (shellcmd, shellopts)
+ | Git.repoIsSsh r = do
+ sshparams <- sshToRepo r [Param sshcmd]
+ return $ Just ("ssh", sshparams)
+ | otherwise = return Nothing
+ where
+ dir = Git.workTree r
+ shellcmd = "git-annex-shell"
+ shellopts = (Param command):(File dir):params
+ sshcmd = shellcmd ++ " " ++
+ unwords (map shellEscape $ toCommand shellopts)
+
+{- Human visible list of remotes. -}
+list :: [Git.Repo] -> String
+list remotes = join ", " $ map Git.repoDescribe remotes
diff --git a/RemoteClass.hs b/RemoteClass.hs
new file mode 100644
index 000000000..df2aefb71
--- /dev/null
+++ b/RemoteClass.hs
@@ -0,0 +1,46 @@
+{- git-annex remotes class
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module RemoteClass where
+
+import Control.Exception
+
+import Annex
+import UUID
+import Key
+
+data Remote = Remote {
+ -- each Remote has a unique uuid
+ uuid :: UUID,
+ -- each Remote has a human visible name
+ name :: String,
+ -- Remotes have a use cost; higher is more expensive
+ cost :: Int,
+ -- Transfers a key to the remote.
+ storeKey :: Key -> Annex Bool,
+ -- retrieves a key's contents to a file
+ retrieveKeyFile :: Key -> FilePath -> Annex Bool,
+ -- removes a key's contents
+ removeKey :: Key -> Annex Bool,
+ -- Checks if a key is present in the remote; if the remote
+ -- cannot be accessed returns a Left error.
+ hasKey :: Key -> Annex (Either IOException Bool),
+ -- Some remotes can check hasKey without an expensive network
+ -- operation.
+ hasKeyCheap :: Bool
+}
+
+instance Show Remote where
+ show remote = "Remote { uuid =\"" ++ uuid remote ++ "\" }"
+
+-- two remotes are the same if they have the same uuid
+instance Eq Remote where
+ a == b = uuid a == uuid b
+
+-- order remotes by cost
+instance Ord Remote where
+ compare a b = compare (cost a) (cost b)
diff --git a/Remotes.hs b/Remotes.hs
index 5a65e4fc7..5fc594ee2 100644
--- a/Remotes.hs
+++ b/Remotes.hs
@@ -322,3 +322,12 @@ git_annex_shell r command params
shellopts = (Param command):(File dir):params
sshcmd = shellcmd ++ " " ++
unwords (map shellEscape $ toCommand shellopts)
+
+{- Filters a list of repos to ones that have listed UUIDs. -}
+reposByUUID :: [Git.Repo] -> [UUID] -> Annex [Git.Repo]
+reposByUUID repos uuids = filterM match repos
+ where
+ match r = do
+ u <- getUUID r
+ return $ u `elem` uuids
+
diff --git a/UUID.hs b/UUID.hs
index 239d373f1..42afd7ba8 100644
--- a/UUID.hs
+++ b/UUID.hs
@@ -14,8 +14,6 @@ module UUID (
getUncachedUUID,
prepUUID,
genUUID,
- reposByUUID,
- reposWithoutUUID,
prettyPrintUUIDs,
describeUUID,
uuidLog,
@@ -87,22 +85,6 @@ prepUUID = do
uuid <- liftIO $ genUUID
Annex.setConfig configkey uuid
-{- Filters a list of repos to ones that have listed UUIDs. -}
-reposByUUID :: [Git.Repo] -> [UUID] -> Annex [Git.Repo]
-reposByUUID repos uuids = filterM match repos
- where
- match r = do
- u <- getUUID r
- return $ u `elem` uuids
-
-{- Filters a list of repos to ones that do not have the listed UUIDs. -}
-reposWithoutUUID :: [Git.Repo] -> [UUID] -> Annex [Git.Repo]
-reposWithoutUUID repos uuids = filterM unmatch repos
- where
- unmatch r = do
- u <- getUUID r
- return $ u `notElem` uuids
-
{- Pretty-prints a list of UUIDs -}
prettyPrintUUIDs :: [UUID] -> Annex String
prettyPrintUUIDs uuids = do