aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2018-03-07 15:15:23 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2018-03-07 15:38:01 -0400
commit9f4771eb3c3f29ea38bbfc70eaf78198ca948840 (patch)
tree254a904d70a593da0c4bcfc4d948d4f9a07d19c2
parentb7aa7445a5d97d6f43fbea26ae2ee1773744ebc9 (diff)
implemented git-annex-shell p2pstdio
Not yet used by git-annex, but this will allow faster transfers etc than using individual ssh connections and rsync. Not called git-annex-shell p2p, because git-annex p2p does something else and I don't want two subcommands with the same name between the two for sanity reasons. This commit was sponsored by Øyvind Andersen Holm.
-rw-r--r--CHANGELOG1
-rw-r--r--CmdLine/GitAnnexShell.hs17
-rw-r--r--CmdLine/GitAnnexShell/Checks.hs23
-rw-r--r--Command/P2PStdIO.hs44
-rw-r--r--P2P/IO.hs10
-rw-r--r--P2P/Protocol.hs2
-rw-r--r--doc/design/p2p_protocol.mdwn6
-rw-r--r--doc/git-annex-shell.mdwn6
-rw-r--r--doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn11
-rw-r--r--git-annex.cabal1
10 files changed, 110 insertions, 11 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 174e4a3e1..6ec71de55 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -16,6 +16,7 @@ git-annex (6.20180228) UNRELEASED; urgency=medium
in direct mode, it neglected to check that the content was actually
present when locking it. This could cause git annex drop to remove
the only copy of a file when it thought the tor remote had a copy.
+ * git-annex-shell: Added p2pstdio mode.
-- Joey Hess <id@joeyh.name> Wed, 28 Feb 2018 11:53:03 -0400
diff --git a/CmdLine/GitAnnexShell.hs b/CmdLine/GitAnnexShell.hs
index 154bfeb38..3dc31e602 100644
--- a/CmdLine/GitAnnexShell.hs
+++ b/CmdLine/GitAnnexShell.hs
@@ -1,6 +1,6 @@
{- git-annex-shell main program
-
- - Copyright 2010-2012 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -28,6 +28,7 @@ import qualified Command.TransferInfo
import qualified Command.Commit
import qualified Command.NotifyChanges
import qualified Command.GCryptSetup
+import qualified Command.P2PStdIO
cmds_readonly :: [Command]
cmds_readonly =
@@ -47,8 +48,18 @@ cmds_notreadonly =
, Command.GCryptSetup.cmd
]
+-- Commands that can operate readonly or not; they use checkNotReadOnly.
+cmds_readonly_capable :: [Command]
+cmds_readonly_capable =
+ [ gitAnnexShellCheck Command.P2PStdIO.cmd
+ ]
+
+cmds_readonly_safe :: [Command]
+cmds_readonly_safe = cmds_readonly ++ cmds_readonly_capable
+
cmds :: [Command]
-cmds = map (adddirparam . noMessages) (cmds_readonly ++ cmds_notreadonly)
+cmds = map (adddirparam . noMessages)
+ (cmds_readonly ++ cmds_notreadonly ++ cmds_readonly_capable)
where
adddirparam c = c { cmdparamdesc = "DIRECTORY " ++ cmdparamdesc c }
@@ -94,7 +105,7 @@ builtins = map cmdname cmds
builtin :: String -> String -> [String] -> IO ()
builtin cmd dir params = do
- unless (cmd `elem` map cmdname cmds_readonly)
+ unless (cmd `elem` map cmdname cmds_readonly_safe)
checkNotReadOnly
checkDirectory $ Just dir
let (params', fieldparams, opts) = partitionParams params
diff --git a/CmdLine/GitAnnexShell/Checks.hs b/CmdLine/GitAnnexShell/Checks.hs
index fcbf14b24..3409884c0 100644
--- a/CmdLine/GitAnnexShell/Checks.hs
+++ b/CmdLine/GitAnnexShell/Checks.hs
@@ -14,17 +14,28 @@ import Annex.Init
import Utility.UserInfo
import Utility.Env
+limitedEnv :: String
+limitedEnv = "GIT_ANNEX_SHELL_LIMITED"
+
checkNotLimited :: IO ()
-checkNotLimited = checkEnv "GIT_ANNEX_SHELL_LIMITED"
+checkNotLimited = checkEnv limitedEnv
+
+readOnlyEnv :: String
+readOnlyEnv = "GIT_ANNEX_SHELL_READONLY"
checkNotReadOnly :: IO ()
-checkNotReadOnly = checkEnv "GIT_ANNEX_SHELL_READONLY"
+checkNotReadOnly = checkEnv readOnlyEnv
checkEnv :: String -> IO ()
-checkEnv var = getEnv var >>= \case
- Nothing -> noop
- Just "" -> noop
- Just _ -> giveup $ "Action blocked by " ++ var
+checkEnv var = checkEnvSet var >>= \case
+ False -> noop
+ True -> giveup $ "Action blocked by " ++ var
+
+checkEnvSet :: String -> IO Bool
+checkEnvSet var = getEnv var >>= return . \case
+ Nothing -> False
+ Just "" -> False
+ Just _ -> True
checkDirectory :: Maybe FilePath -> IO ()
checkDirectory mdir = do
diff --git a/Command/P2PStdIO.hs b/Command/P2PStdIO.hs
new file mode 100644
index 000000000..f6e4ae0f0
--- /dev/null
+++ b/Command/P2PStdIO.hs
@@ -0,0 +1,44 @@
+{- git-annex command
+ -
+ - Copyright 2018 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.P2PStdIO where
+
+import Command
+import P2P.IO
+import P2P.Annex
+import qualified P2P.Protocol as P2P
+import Git.Types
+import qualified Annex
+import Annex.UUID
+import qualified CmdLine.GitAnnexShell.Checks as Checks
+import qualified CmdLine.GitAnnexShell.Fields as Fields
+import Utility.AuthToken
+import Utility.Tmp.Dir
+
+cmd :: Command
+cmd = noMessages $ command "p2pstdio" SectionPlumbing
+ "communicate in P2P protocol over stdio"
+ paramNothing (withParams seek)
+
+seek :: CmdParams -> CommandSeek
+seek = withNothing start
+
+start :: CommandStart
+start = do
+ servermode <- liftIO $
+ Checks.checkEnvSet Checks.readOnlyEnv >>= return . \case
+ True -> P2P.ServeReadOnly
+ False -> P2P.ServeReadWrite
+ theiruuid <- Fields.getField Fields.remoteUUID >>= \case
+ Nothing -> giveup "missing remoteuuid field"
+ Just u -> return (toUUID u)
+ myuuid <- getUUID
+ conn <- stdioP2PConnection <$> Annex.gitRepo
+ let server = P2P.serveAuthed servermode myuuid
+ runFullProto (Serving theiruuid Nothing) conn server >>= \case
+ Right () -> next $ next $ return True
+ Left e -> giveup e
diff --git a/P2P/IO.hs b/P2P/IO.hs
index 9ebb102f1..6cdc5b7d5 100644
--- a/P2P/IO.hs
+++ b/P2P/IO.hs
@@ -10,6 +10,7 @@
module P2P.IO
( RunProto
, P2PConnection(..)
+ , stdioP2PConnection
, connectPeer
, closeConnection
, serveUnixSocket
@@ -50,6 +51,15 @@ data P2PConnection = P2PConnection
, connOhdl :: Handle
}
+-- P2PConnection using stdio.
+stdioP2PConnection :: Git.Repo -> P2PConnection
+stdioP2PConnection g = P2PConnection
+ { connRepo = g
+ , connCheckAuth = const False
+ , connIhdl = stdin
+ , connOhdl = stdout
+ }
+
-- Opens a connection to a peer. Does not authenticate with it.
connectPeer :: Git.Repo -> P2PAddress -> IO P2PConnection
connectPeer g (TorAnnex onionaddress onionport) = do
diff --git a/P2P/Protocol.hs b/P2P/Protocol.hs
index 81b2156cc..c750ae6ff 100644
--- a/P2P/Protocol.hs
+++ b/P2P/Protocol.hs
@@ -1,5 +1,7 @@
{- P2P protocol
-
+ - See doc/design/p2p_protocol.mdwn
+ -
- Copyright 2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
diff --git a/doc/design/p2p_protocol.mdwn b/doc/design/p2p_protocol.mdwn
index 23ac4052a..be3c5e6cb 100644
--- a/doc/design/p2p_protocol.mdwn
+++ b/doc/design/p2p_protocol.mdwn
@@ -3,7 +3,7 @@ communicate between peers.
There's a common line-based serialization of the protocol, but other
serializations are also possible. The line-based serialization is spoken
-by [[git-annex-shell], and by [[git-annex-remotedaemon]] when serving tor.
+by [[git-annex-shell], and by git-annex over tor.
One peer is known as the client, and is the peer that initiates the
connection. The other peer is known as the server, and is the peer that the
@@ -29,6 +29,10 @@ connection.
AUTH_SUCCESS UUID
AUTH_FAILURE
+Note that authentication does not guarantee that the client is talking to
+who they expect to be talking to. This, and encryption of the connection,
+are handled at a lower level.
+
## Errors
Either the client or the server may send an error message at any
diff --git a/doc/git-annex-shell.mdwn b/doc/git-annex-shell.mdwn
index 167f54012..cf72e091b 100644
--- a/doc/git-annex-shell.mdwn
+++ b/doc/git-annex-shell.mdwn
@@ -90,6 +90,12 @@ first "/~/" or "/~user/" is expanded to the specified home directory.
Sets up a repository as a gcrypt repository.
+* p2pstdio directory
+
+ This causes git-annex-shell to communicate using the git-annex p2p
+ protocol over stdio. When supported by git-annex-shell, this allows
+ multiple actions to be run over a single connection, improving speed.
+
# OPTIONS
Most options are the same as in git-annex. The ones specific
diff --git a/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn b/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn
index dd6be9a30..ff4b8c59d 100644
--- a/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn
+++ b/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn
@@ -23,7 +23,7 @@ letting git-annex-shell on the remote work that out.
So, it seems better to not use sftp, and instead roll our own simple
file transfer protocol.
-So, "git-annex-shell -c multi" would speak a protocol over stdin/stdout
+So, "git-annex-shell -c p2pstdio" would speak a protocol over stdin/stdout
that essentially contains the commands inannex, lockcontent, dropkey,
recvkey, and sendkey.
@@ -31,3 +31,12 @@ P2P.Protocol already contains such a similar protocol, used over tor.
That protocol even supports resuming interrupted transfers.
It has stuff including auth that this wouldn't need, but it would be
good to unify with it as much as possible.
+
+----
+
+Implementation todos:
+
+* git-annex-shell p2pstdio currently always verifies content it receives.
+ git-annex-shell recvkey has a speed optimisation, when it's told the file
+ being sent is locked, it can avoid an expensive verification.
+* Maybe similar for transfers in the other direction?
diff --git a/git-annex.cabal b/git-annex.cabal
index 994e01570..6a8aa490a 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -754,6 +754,7 @@ Executable git-annex
Command.NotifyChanges
Command.NumCopies
Command.P2P
+ Command.P2PStdIO
Command.PostReceive
Command.PreCommit
Command.Proxy