diff options
author | Joey Hess <joey@kitenet.net> | 2014-04-06 19:06:03 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2014-04-06 19:10:23 -0400 |
commit | 8c4bfe2f2141bce84ea22120da445c148b6f1168 (patch) | |
tree | 817f5951cec02c1acec3f26c886beea79cf0957c | |
parent | 1eb96cc31a0f0ec0339f6b28a362b057444069af (diff) |
added git-annex remotedaemon
So far, handling connecting to git-annex-shell notifychanges, and
pulling immediately when a change is pushed to a remote.
A little bit buggy (crashes after the first pull), but it already works!
This commit was sponsored by Mark Sheppard.
-rw-r--r-- | CmdLine/GitAnnex.hs | 2 | ||||
-rw-r--r-- | Command/NotifyChanges.hs | 2 | ||||
-rw-r--r-- | Command/RemoteDaemon.hs | 24 | ||||
-rw-r--r-- | Git/Types.hs | 4 | ||||
-rw-r--r-- | RemoteDaemon/Core.hs | 114 | ||||
-rw-r--r-- | RemoteDaemon/Transport.hs | 21 | ||||
-rw-r--r-- | RemoteDaemon/Transport/Ssh.hs | 75 | ||||
-rw-r--r-- | RemoteDaemon/Transport/Ssh/Types.hs (renamed from RemoteDaemon/EndPoint/GitAnnexShell/Types.hs) | 8 | ||||
-rw-r--r-- | RemoteDaemon/Types.hs | 58 | ||||
-rw-r--r-- | doc/design/git-remote-daemon.mdwn | 112 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 4 |
11 files changed, 342 insertions, 82 deletions
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 3604681f9..9f6eb5ff0 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -89,6 +89,7 @@ import qualified Command.WebApp #ifdef WITH_XMPP import qualified Command.XMPPGit #endif +import qualified Command.RemoteDaemon #endif import qualified Command.Test #ifdef WITH_TESTSUITE @@ -176,6 +177,7 @@ cmds = concat #ifdef WITH_XMPP , Command.XMPPGit.def #endif + , Command.RemoteDaemon.def #endif , Command.Test.def #ifdef WITH_TESTSUITE diff --git a/Command/NotifyChanges.hs b/Command/NotifyChanges.hs index a1a076718..d0df05551 100644 --- a/Command/NotifyChanges.hs +++ b/Command/NotifyChanges.hs @@ -13,7 +13,7 @@ import Utility.DirWatcher import Utility.DirWatcher.Types import qualified Git import Git.Sha -import RemoteDaemon.EndPoint.GitAnnexShell.Types +import RemoteDaemon.Transport.Ssh.Types import Control.Concurrent import Control.Concurrent.Async diff --git a/Command/RemoteDaemon.hs b/Command/RemoteDaemon.hs new file mode 100644 index 000000000..61c3a7d84 --- /dev/null +++ b/Command/RemoteDaemon.hs @@ -0,0 +1,24 @@ +{- git-annex command + - + - Copyright 2014 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.RemoteDaemon where + +import Common.Annex +import Command +import RemoteDaemon.Core + +def :: [Command] +def = [noCommit $ command "remotedaemon" paramNothing seek SectionPlumbing + "detects when remotes have changed, and fetches from them"] + +seek :: CommandSeek +seek = withNothing start + +start :: CommandStart +start = do + liftIO runForeground + stop diff --git a/Git/Types.hs b/Git/Types.hs index 802922532..950fe4b00 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -27,7 +27,7 @@ data RepoLocation | LocalUnknown FilePath | Url URI | Unknown - deriving (Show, Eq) + deriving (Show, Eq, Ord) data Repo = Repo { location :: RepoLocation @@ -41,7 +41,7 @@ data Repo = Repo , gitEnv :: Maybe [(String, String)] -- global options to pass to git when running git commands , gitGlobalOpts :: [CommandParam] - } deriving (Show, Eq) + } deriving (Show, Eq, Ord) type RemoteName = String diff --git a/RemoteDaemon/Core.hs b/RemoteDaemon/Core.hs new file mode 100644 index 000000000..8960bf8d3 --- /dev/null +++ b/RemoteDaemon/Core.hs @@ -0,0 +1,114 @@ +{- git-remote-daemon core + - + - Copyright 2014 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module RemoteDaemon.Core (runForeground) where + +import qualified Annex +import Common +import Types.GitConfig +import RemoteDaemon.Types +import RemoteDaemon.Transport +import qualified Git +import qualified Git.Types as Git +import qualified Git.CurrentRepo +import Utility.SimpleProtocol + +import Control.Concurrent.Async +import Control.Concurrent.Chan +import Network.URI +import qualified Data.Map as M + +runForeground :: IO () +runForeground = do + ichan <- newChan :: IO (Chan Consumed) + ochan <- newChan :: IO (Chan Emitted) + + void $ async $ controller ichan ochan + + let reader = forever $ do + l <- getLine + case parseMessage l of + Nothing -> error $ "protocol error: " ++ l + Just cmd -> writeChan ichan cmd + let writer = forever $ do + msg <- readChan ochan + putStrLn $ unwords $ formatMessage msg + hFlush stdout + + -- If the reader or writer fails, for example because stdin/stdout + -- gets closed, kill the other one, and throw an exception which + -- will take down the daemon. + void $ concurrently reader writer + +type RemoteMap = M.Map Git.Repo (IO (), Chan Consumed) + +-- Runs the transports, dispatching messages to them, and handling +-- the main control messages. +controller :: Chan Consumed -> Chan Emitted -> IO () +controller ichan ochan = do + m <- getRemoteMap ochan + startrunning m + go False m + where + go paused m = do + cmd <- readChan ichan + case cmd of + RELOAD -> do + m' <- getRemoteMap ochan + let common = M.intersection m m' + let new = M.difference m' m + let old = M.difference m m' + stoprunning old + unless paused $ + startrunning new + go paused (M.union common new) + PAUSE -> do + stoprunning m + go True m + RESUME -> do + when paused $ + startrunning m + go False m + STOP -> exitSuccess + -- All remaining messages are sent to + -- all Transports. + msg -> do + unless paused $ + forM_ chans (`writeChan` msg) + go paused m + where + chans = map snd (M.elems m) + + startrunning m = forM_ (M.elems m) startrunning' + startrunning' (transport, _) = void $ async transport + + -- Ask the transport nicely to stop. + stoprunning m = forM_ (M.elems m) stoprunning' + stoprunning' (_, c) = writeChan c STOP + +getRemoteMap :: Chan Emitted -> IO RemoteMap +getRemoteMap ochan = do + annexstate <- Annex.new =<< Git.CurrentRepo.get + genRemoteMap annexstate ochan + +-- Generates a map with a transport for each supported remote in the git repo, +-- except those that have annex.sync = false +genRemoteMap :: Annex.AnnexState -> Chan Emitted -> IO RemoteMap +genRemoteMap annexstate ochan = M.fromList . catMaybes <$> mapM gen rs + where + rs = Git.remotes (Annex.repo annexstate) + gen r = case Git.location r of + Git.Url u -> case M.lookup (uriScheme u) remoteTransports of + Just transport + | remoteAnnexSync (extractRemoteGitConfig r (Git.repoDescribe r)) -> do + ichan <- newChan :: IO (Chan Consumed) + return $ Just + ( r + , (transport r (Git.repoDescribe r) annexstate ichan ochan, ichan) + ) + _ -> return Nothing + _ -> return Nothing diff --git a/RemoteDaemon/Transport.hs b/RemoteDaemon/Transport.hs new file mode 100644 index 000000000..1bac7f877 --- /dev/null +++ b/RemoteDaemon/Transport.hs @@ -0,0 +1,21 @@ +{- git-remote-daemon transports + - + - Copyright 2014 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module RemoteDaemon.Transport where + +import RemoteDaemon.Types +import qualified RemoteDaemon.Transport.Ssh + +import qualified Data.Map as M + +-- Corresponds to uriScheme +type TransportScheme = String + +remoteTransports :: M.Map TransportScheme Transport +remoteTransports = M.fromList + [ ("ssh:", RemoteDaemon.Transport.Ssh.transport) + ] diff --git a/RemoteDaemon/Transport/Ssh.hs b/RemoteDaemon/Transport/Ssh.hs new file mode 100644 index 000000000..8f4d007e8 --- /dev/null +++ b/RemoteDaemon/Transport/Ssh.hs @@ -0,0 +1,75 @@ +{- git-remote-daemon, git-annex-shell over ssh transport + - + - Copyright 2014 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module RemoteDaemon.Transport.Ssh (transport) where + +import Common.Annex +import qualified Annex +import RemoteDaemon.Types +import qualified RemoteDaemon.Transport.Ssh.Types as SshRemote +import Remote.Helper.Ssh +import Utility.SimpleProtocol +import qualified Git +import Annex.CatFile +import Git.Command + +import Control.Concurrent.Chan +import Control.Concurrent.Async +import System.Process (std_in, std_out) + +transport :: Transport +transport r remotename annexstate ichan ochan = Annex.eval annexstate $ do + v <- git_annex_shell r "notifychanges" [] [] + case v of + Nothing -> noop + Just (cmd, params) -> liftIO $ go cmd (toCommand params) + where + send msg = writeChan ochan (msg remotename) + go cmd params = do + (Just toh, Just fromh, _, pid) <- createProcess (proc cmd params) + { std_in = CreatePipe + , std_out = CreatePipe + } + + let shutdown = do + hClose toh + hClose fromh + void $ waitForProcess pid + send DISCONNECTED + + let fromshell = forever $ do + l <- hGetLine fromh + case parseMessage l of + Just SshRemote.READY -> send CONNECTED + Just (SshRemote.CHANGED refs) -> + Annex.eval annexstate $ + fetchNew remotename refs + Nothing -> shutdown + + -- The only control message that matters is STOP. + -- + -- Note that a CHANGED control message is not handled; + -- we don't push to the ssh remote. The assistant + -- and git-annex sync both handle pushes, so there's no + -- need to do it here. + let handlecontrol = forever $ do + msg <- readChan ichan + case msg of + STOP -> ioError (userError "done") + _ -> noop + + -- Run both threads until one finishes. + void $ tryIO $ concurrently fromshell handlecontrol + shutdown + +-- Check if any of the shas are actally new, to avoid unnecessary fetching. +fetchNew :: RemoteName -> [Git.Sha] -> Annex () +fetchNew remotename = check + where + check [] = void $ inRepo $ runBool [Param "fetch", Param remotename] + check (r:rs) = maybe (check rs) (const noop) + =<< catObjectDetails r diff --git a/RemoteDaemon/EndPoint/GitAnnexShell/Types.hs b/RemoteDaemon/Transport/Ssh/Types.hs index 996c4237c..d3fd314b4 100644 --- a/RemoteDaemon/EndPoint/GitAnnexShell/Types.hs +++ b/RemoteDaemon/Transport/Ssh/Types.hs @@ -1,4 +1,4 @@ -{- git-remote-daemon, git-annex-shell endpoint, datatypes +{- git-remote-daemon, git-annex-shell notifychanges protocol types - - Copyright 2014 Joey Hess <joey@kitenet.net> - @@ -8,7 +8,7 @@ {-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -module RemoteDaemon.EndPoint.GitAnnexShell.Types ( +module RemoteDaemon.Transport.Ssh.Types ( Notification(..), Proto.serialize, Proto.deserialize, @@ -16,11 +16,11 @@ module RemoteDaemon.EndPoint.GitAnnexShell.Types ( ) where import qualified Utility.SimpleProtocol as Proto -import RemoteDaemon.Types (ShaList) +import RemoteDaemon.Types (RefList) data Notification = READY - | CHANGED ShaList + | CHANGED RefList instance Proto.Sendable Notification where formatMessage READY = ["READY"] diff --git a/RemoteDaemon/Types.hs b/RemoteDaemon/Types.hs index b4b8ba066..5cb0ef758 100644 --- a/RemoteDaemon/Types.hs +++ b/RemoteDaemon/Types.hs @@ -10,74 +10,78 @@ module RemoteDaemon.Types where +import qualified Annex import qualified Git.Types as Git import qualified Utility.SimpleProtocol as Proto +import Control.Concurrent + +-- A Transport for a particular git remote consumes some messages +-- from a Chan, and emits others to another Chan. +type Transport = Git.Repo -> RemoteName -> Annex.AnnexState -> Chan Consumed -> Chan Emitted -> IO () + -- Messages that the daemon emits. data Emitted = CONNECTED RemoteName | DISCONNECTED RemoteName - | CHANGED RemoteName ShaList - | STATUS RemoteName UserMessage - | ERROR RemoteName UserMessage + | SYNCING RemoteName + | DONESYNCING RemoteName Bool -- Messages that the deamon consumes. data Consumed = PAUSE | RESUME - | PUSH RemoteName + | CHANGED RefList | RELOAD + | STOP type RemoteName = String -type UserMessage = String -type ShaList = [Git.Sha] +type RefList = [Git.Ref] instance Proto.Sendable Emitted where formatMessage (CONNECTED remote) = ["CONNECTED", Proto.serialize remote] formatMessage (DISCONNECTED remote) = ["DISCONNECTED", Proto.serialize remote] - formatMessage (CHANGED remote shas) = - ["CHANGED" - , Proto.serialize remote - , Proto.serialize shas - ] - formatMessage (STATUS remote msg) = - ["STATUS" - , Proto.serialize remote - , Proto.serialize msg - ] - formatMessage (ERROR remote msg) = - ["ERROR" - , Proto.serialize remote - , Proto.serialize msg - ] + formatMessage (SYNCING remote) = + ["SYNCING", Proto.serialize remote] + formatMessage (DONESYNCING remote status) = + ["DONESYNCING", Proto.serialize remote, Proto.serialize status] instance Proto.Sendable Consumed where formatMessage PAUSE = ["PAUSE"] formatMessage RESUME = ["RESUME"] - formatMessage (PUSH remote) = ["PUSH", Proto.serialize remote] + formatMessage (CHANGED refs) =["CHANGED", Proto.serialize refs] formatMessage RELOAD = ["RELOAD"] + formatMessage STOP = ["STOP"] instance Proto.Receivable Emitted where parseCommand "CONNECTED" = Proto.parse1 CONNECTED parseCommand "DISCONNECTED" = Proto.parse1 DISCONNECTED - parseCommand "CHANGED" = Proto.parse2 CHANGED - parseCommand "STATUS" = Proto.parse2 STATUS - parseCommand "ERROR" = Proto.parse2 ERROR + parseCommand "SYNCING" = Proto.parse1 SYNCING + parseCommand "DONESYNCING" = Proto.parse2 DONESYNCING parseCommand _ = Proto.parseFail instance Proto.Receivable Consumed where parseCommand "PAUSE" = Proto.parse0 PAUSE parseCommand "RESUME" = Proto.parse0 RESUME - parseCommand "PUSH" = Proto.parse1 PUSH + parseCommand "CHANGED" = Proto.parse1 CHANGED parseCommand "RELOAD" = Proto.parse0 RELOAD + parseCommand "STOP" = Proto.parse0 STOP parseCommand _ = Proto.parseFail instance Proto.Serializable [Char] where serialize = id deserialize = Just -instance Proto.Serializable ShaList where +instance Proto.Serializable RefList where serialize = unwords . map Git.fromRef deserialize = Just . map Git.Ref . words + +instance Proto.Serializable Bool where + serialize False = "0" + serialize True = "1" + + deserialize "0" = Just False + deserialize "1" = Just True + deserialize _ = Nothing diff --git a/doc/design/git-remote-daemon.mdwn b/doc/design/git-remote-daemon.mdwn index 0658d4583..db56bd633 100644 --- a/doc/design/git-remote-daemon.mdwn +++ b/doc/design/git-remote-daemon.mdwn @@ -37,35 +37,24 @@ # design -Let git-remote-daemon be the name. It runs in a repo and -either: +Let git-remote-daemon be the name. Or for git-annex, +`git annex remotedaemon`. -* forks to background and performs configured actions (ie, `git pull`) -* with --foreground, communicates over stdio - with its caller using a simple protocol (exiting when its caller closes its - stdin handle so it will stop when the assistant stops). +It runs in one of two ways: -It is configured entirely by .git/config. +1. Forked to background, using a named pipe for the control protocol. +2. With --foreground, the control protocol goes over stdio. -# encryption & authentication - -For simplicity, the network transports have to do their own end-to-end -encryption. Encryption is not part of this design. - -(XMPP does not do end-to-end encryption, but might be supported -transitionally.) - -Ditto for authentication that we're talking to who we indend to talk to. -Any public key data etc used for authenticion is part of the remote's -configuration (or hidden away in a secure chmodded file, if neccesary). -This design does not concern itself with authenticating the remote node, -it just takes the auth token and uses it. +Either way, behavior is the same: -For example, in telehash, each node has its own keypair, which is used -or authentication and encryption, and is all that's needed to route -messages to that node. +* Get a list of remotes to act on by looking at .git/config +* Automatically notices when a remote has changes to branches + matching remote.$name.fetch, and pulls them down to the appropriate + location. +* When the control protocol informs it about a new ref that's available, + it offers the ref to any interested remotes. -# stdio protocol +# control protocol This is an asynchronous protocol. Ie, either side can send any message at any time, and the other side does not send a reply. @@ -82,25 +71,21 @@ the webapp. * `CONNECTED $remote` - Send when a connection has been made with a remote. + Sent when a connection has been made with a remote. * `DISCONNECTED $remote` - Send when connection with a remote has been lost. - -* `CHANGED $remote $sha ...` - - This indicates that refs in the named git remote have changed, - and indicates the new shas. + Sent when connection with a remote has been lost. -* `STATUS $remote $string` +* `SYNCING $remote` - A user-visible status message about a named remote. + Indicates that a pull or a push with a remote is in progress. + Always followed by DONESYNCING. -* `ERROR $remote $string` +* `DONESYNCING $remote 1|0` - A user-visible error about a named remote. - (Can continue running past this point, for this or other remotes.) + Indicates that syncing with a remote is done, and either succeeded + (1) or failed (0). ## consumed messages @@ -119,21 +104,40 @@ the webapp. Affects all remotes. -* `PUSH $remote` +* `CHANGED ref ...` - Requests that a git push be done with the remote over the network - transport when next possible. May be repeated many times before the push - finally happens. + Indicates that a ref is new or has changed. These can be offered to peers, + and peers that are interested in them can pull the content. * `RELOAD` Indicates that configs have changed. Daemon should reload .git/config and/or restart. -# send-pack and receive-pack +* `STOP` -Done as the assistant does with XMPP currently. Does not involve -communication over the above stdio protocol. + Shut down git-remote-daemon + + (When using stdio, it also should shutdown when it reaches EOF on + stdin.) + +# encryption & authentication + +For simplicity, the network transports have to do their own end-to-end +encryption. Encryption is not part of this design. + +(XMPP does not do end-to-end encryption, but might be supported +transitionally.) + +Ditto for authentication that we're talking to who we indend to talk to. +Any public key data etc used for authenticion is part of the remote's +configuration (or hidden away in a secure chmodded file, if neccesary). +This design does not concern itself with authenticating the remote node, +it just takes the auth token and uses it. + +For example, in telehash, each node has its own keypair, which is used +or authentication and encryption, and is all that's needed to route +messages to that node. # network level protocol @@ -143,17 +147,29 @@ This seems to need to be network-layer dependant. Telehash will need one design, and git-annex-shell on a central ssh server has a very different (and much simpler) design. -## git-annex-shell +## ssh + +`git-annex-shell notifychanges` is run, and speaks a simple protocol +over stdio to inform when refs on the remote have changed. + +No pushing is done for CHANGED, since git handles ssh natively. -Speak a subset of the stdio protocol between git-annex-shell and -git-remote-daemon, over ssh. +TODO: -Only thing that seems to be needed is CHANGED, actually! +* It already detects changes and pulls, but it then dies with a protocol + error. +* Remote system might not be available. Find a smart way to detect it, + ideally w/o generating network traffic. One way might be to check + if the ssh connection caching control socket exists, for example. +* Remote system might be available, and connection get lost. Should + reconnect, but needs to avoid bad behavior (ie, constant reconnect + attempts.) +* Detect if old system had a too old git-annex-shell and avoid bad behavior ## telehash TODO -## XMPP +## xmpp Reuse [[assistant/xmpp]] diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 987a9ffef..2d43953af 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -922,6 +922,10 @@ subdirectories). There are several parameters, provided by Haskell's tasty test framework. +* `remotedaemon` + + Detects when remotes have changed and fetches from them. + * `xmppgit` This command is used internally to perform git pulls over XMPP. |