From 9f4771eb3c3f29ea38bbfc70eaf78198ca948840 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 7 Mar 2018 15:15:23 -0400 Subject: implemented git-annex-shell p2pstdio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not yet used by git-annex, but this will allow faster transfers etc than using individual ssh connections and rsync. Not called git-annex-shell p2p, because git-annex p2p does something else and I don't want two subcommands with the same name between the two for sanity reasons. This commit was sponsored by Øyvind Andersen Holm. --- CHANGELOG | 1 + CmdLine/GitAnnexShell.hs | 17 +++++++-- CmdLine/GitAnnexShell/Checks.hs | 23 ++++++++--- Command/P2PStdIO.hs | 44 ++++++++++++++++++++++ P2P/IO.hs | 10 +++++ P2P/Protocol.hs | 2 + doc/design/p2p_protocol.mdwn | 6 ++- doc/git-annex-shell.mdwn | 6 +++ ...remotes_with_git-annex-shell_mass_protocol.mdwn | 11 +++++- git-annex.cabal | 1 + 10 files changed, 110 insertions(+), 11 deletions(-) create mode 100644 Command/P2PStdIO.hs diff --git a/CHANGELOG b/CHANGELOG index 174e4a3e1..6ec71de55 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -16,6 +16,7 @@ git-annex (6.20180228) UNRELEASED; urgency=medium in direct mode, it neglected to check that the content was actually present when locking it. This could cause git annex drop to remove the only copy of a file when it thought the tor remote had a copy. + * git-annex-shell: Added p2pstdio mode. -- Joey Hess Wed, 28 Feb 2018 11:53:03 -0400 diff --git a/CmdLine/GitAnnexShell.hs b/CmdLine/GitAnnexShell.hs index 154bfeb38..3dc31e602 100644 --- a/CmdLine/GitAnnexShell.hs +++ b/CmdLine/GitAnnexShell.hs @@ -1,6 +1,6 @@ {- git-annex-shell main program - - - Copyright 2010-2012 Joey Hess + - Copyright 2010-2018 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -28,6 +28,7 @@ import qualified Command.TransferInfo import qualified Command.Commit import qualified Command.NotifyChanges import qualified Command.GCryptSetup +import qualified Command.P2PStdIO cmds_readonly :: [Command] cmds_readonly = @@ -47,8 +48,18 @@ cmds_notreadonly = , Command.GCryptSetup.cmd ] +-- Commands that can operate readonly or not; they use checkNotReadOnly. +cmds_readonly_capable :: [Command] +cmds_readonly_capable = + [ gitAnnexShellCheck Command.P2PStdIO.cmd + ] + +cmds_readonly_safe :: [Command] +cmds_readonly_safe = cmds_readonly ++ cmds_readonly_capable + cmds :: [Command] -cmds = map (adddirparam . noMessages) (cmds_readonly ++ cmds_notreadonly) +cmds = map (adddirparam . noMessages) + (cmds_readonly ++ cmds_notreadonly ++ cmds_readonly_capable) where adddirparam c = c { cmdparamdesc = "DIRECTORY " ++ cmdparamdesc c } @@ -94,7 +105,7 @@ builtins = map cmdname cmds builtin :: String -> String -> [String] -> IO () builtin cmd dir params = do - unless (cmd `elem` map cmdname cmds_readonly) + unless (cmd `elem` map cmdname cmds_readonly_safe) checkNotReadOnly checkDirectory $ Just dir let (params', fieldparams, opts) = partitionParams params diff --git a/CmdLine/GitAnnexShell/Checks.hs b/CmdLine/GitAnnexShell/Checks.hs index fcbf14b24..3409884c0 100644 --- a/CmdLine/GitAnnexShell/Checks.hs +++ b/CmdLine/GitAnnexShell/Checks.hs @@ -14,17 +14,28 @@ import Annex.Init import Utility.UserInfo import Utility.Env +limitedEnv :: String +limitedEnv = "GIT_ANNEX_SHELL_LIMITED" + checkNotLimited :: IO () -checkNotLimited = checkEnv "GIT_ANNEX_SHELL_LIMITED" +checkNotLimited = checkEnv limitedEnv + +readOnlyEnv :: String +readOnlyEnv = "GIT_ANNEX_SHELL_READONLY" checkNotReadOnly :: IO () -checkNotReadOnly = checkEnv "GIT_ANNEX_SHELL_READONLY" +checkNotReadOnly = checkEnv readOnlyEnv checkEnv :: String -> IO () -checkEnv var = getEnv var >>= \case - Nothing -> noop - Just "" -> noop - Just _ -> giveup $ "Action blocked by " ++ var +checkEnv var = checkEnvSet var >>= \case + False -> noop + True -> giveup $ "Action blocked by " ++ var + +checkEnvSet :: String -> IO Bool +checkEnvSet var = getEnv var >>= return . \case + Nothing -> False + Just "" -> False + Just _ -> True checkDirectory :: Maybe FilePath -> IO () checkDirectory mdir = do diff --git a/Command/P2PStdIO.hs b/Command/P2PStdIO.hs new file mode 100644 index 000000000..f6e4ae0f0 --- /dev/null +++ b/Command/P2PStdIO.hs @@ -0,0 +1,44 @@ +{- git-annex command + - + - Copyright 2018 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.P2PStdIO where + +import Command +import P2P.IO +import P2P.Annex +import qualified P2P.Protocol as P2P +import Git.Types +import qualified Annex +import Annex.UUID +import qualified CmdLine.GitAnnexShell.Checks as Checks +import qualified CmdLine.GitAnnexShell.Fields as Fields +import Utility.AuthToken +import Utility.Tmp.Dir + +cmd :: Command +cmd = noMessages $ command "p2pstdio" SectionPlumbing + "communicate in P2P protocol over stdio" + paramNothing (withParams seek) + +seek :: CmdParams -> CommandSeek +seek = withNothing start + +start :: CommandStart +start = do + servermode <- liftIO $ + Checks.checkEnvSet Checks.readOnlyEnv >>= return . \case + True -> P2P.ServeReadOnly + False -> P2P.ServeReadWrite + theiruuid <- Fields.getField Fields.remoteUUID >>= \case + Nothing -> giveup "missing remoteuuid field" + Just u -> return (toUUID u) + myuuid <- getUUID + conn <- stdioP2PConnection <$> Annex.gitRepo + let server = P2P.serveAuthed servermode myuuid + runFullProto (Serving theiruuid Nothing) conn server >>= \case + Right () -> next $ next $ return True + Left e -> giveup e diff --git a/P2P/IO.hs b/P2P/IO.hs index 9ebb102f1..6cdc5b7d5 100644 --- a/P2P/IO.hs +++ b/P2P/IO.hs @@ -10,6 +10,7 @@ module P2P.IO ( RunProto , P2PConnection(..) + , stdioP2PConnection , connectPeer , closeConnection , serveUnixSocket @@ -50,6 +51,15 @@ data P2PConnection = P2PConnection , connOhdl :: Handle } +-- P2PConnection using stdio. +stdioP2PConnection :: Git.Repo -> P2PConnection +stdioP2PConnection g = P2PConnection + { connRepo = g + , connCheckAuth = const False + , connIhdl = stdin + , connOhdl = stdout + } + -- Opens a connection to a peer. Does not authenticate with it. connectPeer :: Git.Repo -> P2PAddress -> IO P2PConnection connectPeer g (TorAnnex onionaddress onionport) = do diff --git a/P2P/Protocol.hs b/P2P/Protocol.hs index 81b2156cc..c750ae6ff 100644 --- a/P2P/Protocol.hs +++ b/P2P/Protocol.hs @@ -1,4 +1,6 @@ {- P2P protocol + - + - See doc/design/p2p_protocol.mdwn - - Copyright 2016 Joey Hess - diff --git a/doc/design/p2p_protocol.mdwn b/doc/design/p2p_protocol.mdwn index 23ac4052a..be3c5e6cb 100644 --- a/doc/design/p2p_protocol.mdwn +++ b/doc/design/p2p_protocol.mdwn @@ -3,7 +3,7 @@ communicate between peers. There's a common line-based serialization of the protocol, but other serializations are also possible. The line-based serialization is spoken -by [[git-annex-shell], and by [[git-annex-remotedaemon]] when serving tor. +by [[git-annex-shell], and by git-annex over tor. One peer is known as the client, and is the peer that initiates the connection. The other peer is known as the server, and is the peer that the @@ -29,6 +29,10 @@ connection. AUTH_SUCCESS UUID AUTH_FAILURE +Note that authentication does not guarantee that the client is talking to +who they expect to be talking to. This, and encryption of the connection, +are handled at a lower level. + ## Errors Either the client or the server may send an error message at any diff --git a/doc/git-annex-shell.mdwn b/doc/git-annex-shell.mdwn index 167f54012..cf72e091b 100644 --- a/doc/git-annex-shell.mdwn +++ b/doc/git-annex-shell.mdwn @@ -90,6 +90,12 @@ first "/~/" or "/~user/" is expanded to the specified home directory. Sets up a repository as a gcrypt repository. +* p2pstdio directory + + This causes git-annex-shell to communicate using the git-annex p2p + protocol over stdio. When supported by git-annex-shell, this allows + multiple actions to be run over a single connection, improving speed. + # OPTIONS Most options are the same as in git-annex. The ones specific diff --git a/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn b/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn index dd6be9a30..ff4b8c59d 100644 --- a/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn +++ b/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn @@ -23,7 +23,7 @@ letting git-annex-shell on the remote work that out. So, it seems better to not use sftp, and instead roll our own simple file transfer protocol. -So, "git-annex-shell -c multi" would speak a protocol over stdin/stdout +So, "git-annex-shell -c p2pstdio" would speak a protocol over stdin/stdout that essentially contains the commands inannex, lockcontent, dropkey, recvkey, and sendkey. @@ -31,3 +31,12 @@ P2P.Protocol already contains such a similar protocol, used over tor. That protocol even supports resuming interrupted transfers. It has stuff including auth that this wouldn't need, but it would be good to unify with it as much as possible. + +---- + +Implementation todos: + +* git-annex-shell p2pstdio currently always verifies content it receives. + git-annex-shell recvkey has a speed optimisation, when it's told the file + being sent is locked, it can avoid an expensive verification. +* Maybe similar for transfers in the other direction? diff --git a/git-annex.cabal b/git-annex.cabal index 994e01570..6a8aa490a 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -754,6 +754,7 @@ Executable git-annex Command.NotifyChanges Command.NumCopies Command.P2P + Command.P2PStdIO Command.PostReceive Command.PreCommit Command.Proxy -- cgit v1.2.3