summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-01-20 15:34:52 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-01-20 17:14:56 -0400
commit47250a153a6c5a2864fec15fb136290683aeb1c6 (patch)
tree5c17ef6c035d6c919403e52e61ed0d37c2bfd824
parent25f998679cd68cd4bb9b320998253f1b2ae23315 (diff)
ssh connection caching
Ssh connection caching is now enabled automatically by git-annex. Only one ssh connection is made to each host per git-annex run, which can speed some things up a lot, as well as avoiding repeated password prompts. Concurrent git-annex processes also share ssh connections. Cached ssh connections are shut down when git-annex exits. Note: The rsync special remote does not yet participate in the ssh connection caching.
-rw-r--r--Annex.hs3
-rw-r--r--Annex/LockPool.hs43
-rw-r--r--Annex/Ssh.hs107
-rw-r--r--CmdLine.hs2
-rw-r--r--Locations.hs5
-rw-r--r--Remote/Helper/Ssh.hs14
-rw-r--r--debian/changelog5
-rw-r--r--doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn13
-rw-r--r--doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn4
9 files changed, 173 insertions, 23 deletions
diff --git a/Annex.hs b/Annex.hs
index b365132e5..3b79ea270 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -29,6 +29,7 @@ module Annex (
import Control.Monad.State
import Control.Monad.Trans.Control (StM, MonadBaseControl, liftBaseWith, restoreM)
import Control.Monad.Base (liftBase, MonadBase)
+import System.Posix.Types (Fd)
import Common
import qualified Git
@@ -86,6 +87,7 @@ data AnnexState = AnnexState
, forcetrust :: TrustMap
, trustmap :: Maybe TrustMap
, ciphers :: M.Map EncryptedCipher Cipher
+ , lockpool :: M.Map FilePath Fd
, flags :: M.Map String Bool
, fields :: M.Map String String
}
@@ -108,6 +110,7 @@ newState gitrepo = AnnexState
, forcetrust = M.empty
, trustmap = Nothing
, ciphers = M.empty
+ , lockpool = M.empty
, flags = M.empty
, fields = M.empty
}
diff --git a/Annex/LockPool.hs b/Annex/LockPool.hs
new file mode 100644
index 000000000..3fede5739
--- /dev/null
+++ b/Annex/LockPool.hs
@@ -0,0 +1,43 @@
+{- git-annex lock pool
+ -
+ - Copyright 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.LockPool where
+
+import qualified Data.Map as M
+import System.Posix.Types (Fd)
+
+import Common.Annex
+import Annex
+
+{- Create a specified lock file, and takes a shared lock. -}
+lockFile :: FilePath -> Annex ()
+lockFile file = go =<< fromPool file
+ where
+ go (Just _) = return () -- already locked
+ go Nothing = do
+ fd <- liftIO $ openFd file ReadOnly (Just stdFileMode) defaultFileFlags
+ liftIO $ waitToSetLock fd (ReadLock, AbsoluteSeek, 0, 0)
+ changePool $ M.insert file fd
+
+unlockFile :: FilePath -> Annex ()
+unlockFile file = go =<< fromPool file
+ where
+ go Nothing = return ()
+ go (Just fd) = do
+ liftIO $ closeFd fd
+ changePool $ M.delete file
+
+getPool :: Annex (M.Map FilePath Fd)
+getPool = getState lockpool
+
+fromPool :: FilePath -> Annex (Maybe Fd)
+fromPool file = M.lookup file <$> getPool
+
+changePool :: (M.Map FilePath Fd -> M.Map FilePath Fd) -> Annex ()
+changePool a = do
+ m <- getPool
+ changeState $ \s -> s { lockpool = a m }
diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs
new file mode 100644
index 000000000..cd832a373
--- /dev/null
+++ b/Annex/Ssh.hs
@@ -0,0 +1,107 @@
+{- git-annex ssh interface, with connection caching
+ -
+ - Copyright 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.Ssh (
+ sshParams,
+ sshCleanup,
+) where
+
+import qualified Data.Map as M
+import System.IO.Error (try)
+
+import Common.Annex
+import Annex.LockPool
+
+{- Generates parameters to ssh to a given host (or user@host) on a given
+ - port, with connection caching. -}
+sshParams :: (String, Maybe Integer) -> Annex [CommandParam]
+sshParams (host, port) = do
+ cleanstale
+ (socketfile, params) <- sshInfo (host, port)
+ liftIO $ createDirectoryIfMissing True $ parentDir socketfile
+ lockFile $ socket2lock socketfile
+ return params
+ where
+ -- If the lock pool is empty, this is the first ssh of this
+ -- run. There could be stale ssh connections hanging around
+ -- from a previous git-annex run that was interrupted.
+ cleanstale = whenM (null . filter isLock . M.keys <$> getPool) $
+ sshCleanup
+
+sshInfo :: (String, Maybe Integer) -> Annex (FilePath, [CommandParam])
+sshInfo (host, port) = do
+ dir <- fromRepo $ gitAnnexSshDir
+ let socketfile = dir </> hostport2socket host port
+ return $ (socketfile, cacheParams socketfile ++ portParams port ++ [Param host])
+
+cacheParams :: FilePath -> [CommandParam]
+cacheParams socketfile =
+ [ Param "-S", Param socketfile
+ , Params "-o ControlMaster=auto -o ControlPersist=yes"
+ ]
+
+portParams :: Maybe Integer -> [CommandParam]
+portParams Nothing = []
+portParams (Just port) = [Param "-p", Param $ show port]
+
+{- Stop any unused ssh processes. -}
+sshCleanup :: Annex ()
+sshCleanup = do
+ dir <- fromRepo $ gitAnnexSshDir
+ liftIO $ createDirectoryIfMissing True dir
+ sockets <- filter (not . isLock) <$> liftIO (dirContents dir)
+ forM_ sockets cleanup
+ where
+ cleanup socketfile = do
+ -- Drop any shared lock we have, and take an
+ -- exclusive lock, without blocking. If the lock
+ -- succeeds, nothing is using this ssh, and it can
+ -- be stopped.
+ let lockfile = socket2lock socketfile
+ unlockFile lockfile
+ fd <- liftIO $ openFd lockfile ReadWrite (Just stdFileMode) defaultFileFlags
+ v <- liftIO $ try $ setLock fd (WriteLock, AbsoluteSeek, 0, 0)
+ case v of
+ Left _ -> return ()
+ Right _ -> stopssh socketfile
+ liftIO $ closeFd fd
+ stopssh socketfile = do
+ (_, params) <- sshInfo $ socket2hostport socketfile
+ _ <- liftIO $ do
+ -- "ssh -O stop" is noisy on stderr even with -q
+ let cmd = unwords $ toCommand $
+ [ Params "-O stop"
+ ] ++ params
+ _ <- boolSystem "sh"
+ [ Param "-c"
+ , Param $ "ssh " ++ cmd ++ " >/dev/null 2>/dev/null"
+ ]
+ --try $ removeFile socketfile
+ return ()
+ -- Cannot remove the lock file; other processes may
+ -- be waiting on our exclusive lock to use it.
+ return ()
+
+hostport2socket :: String -> Maybe Integer -> FilePath
+hostport2socket host Nothing = host
+hostport2socket host (Just port) = host ++ "!" ++ show port
+
+socket2hostport :: FilePath -> (String, Maybe Integer)
+socket2hostport socket
+ | null p = (h, Nothing)
+ | otherwise = (h, readMaybe p)
+ where
+ (h, p) = separate (== '!') $ takeFileName socket
+
+socket2lock :: FilePath -> FilePath
+socket2lock socket = socket ++ lockExt
+
+isLock :: FilePath -> Bool
+isLock f = lockExt `isSuffixOf` f
+
+lockExt :: String
+lockExt = ".lock"
diff --git a/CmdLine.hs b/CmdLine.hs
index 68157a01a..29b95d01b 100644
--- a/CmdLine.hs
+++ b/CmdLine.hs
@@ -22,6 +22,7 @@ import qualified Annex.Queue
import qualified Git
import qualified Git.Command
import Annex.Content
+import Annex.Ssh
import Command
type Params = [String]
@@ -92,4 +93,5 @@ shutdown :: Annex Bool
shutdown = do
saveState
liftIO Git.Command.reap -- zombies from long-running git processes
+ sshCleanup -- ssh connection caching
return True
diff --git a/Locations.hs b/Locations.hs
index 73a2473b5..03d6deb1d 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -22,6 +22,7 @@ module Locations (
gitAnnexJournalLock,
gitAnnexIndex,
gitAnnexIndexLock,
+ gitAnnexSshDir,
isLinkToAnnex,
annexHashes,
hashDirMixed,
@@ -142,6 +143,10 @@ gitAnnexIndex r = gitAnnexDir r </> "index"
gitAnnexIndexLock :: Git.Repo -> FilePath
gitAnnexIndexLock r = gitAnnexDir r </> "index.lck"
+{- .git/annex/ssh/ is used for ssh connection caching -}
+gitAnnexSshDir :: Git.Repo -> FilePath
+gitAnnexSshDir r = addTrailingPathSeparator $ gitAnnexDir r </> "ssh"
+
{- Checks a symlink target to see if it appears to point to annexed content. -}
isLinkToAnnex :: FilePath -> Bool
isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s
diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs
index 7c5eeddb8..88b29fdb6 100644
--- a/Remote/Helper/Ssh.hs
+++ b/Remote/Helper/Ssh.hs
@@ -7,25 +7,21 @@
module Remote.Helper.Ssh where
-import Common
+import Common.Annex
import qualified Git
import qualified Git.Url
-import Types
import Config
import Annex.UUID
+import Annex.Ssh
{- Generates parameters to ssh to a repository's host and run a command.
- Caller is responsible for doing any neccessary shellEscaping of the
- passed command. -}
sshToRepo :: Git.Repo -> [CommandParam] -> Annex [CommandParam]
sshToRepo repo sshcmd = do
- s <- getConfig repo "ssh-options" ""
- let sshoptions = map Param (words s)
- let sshport = case Git.Url.port repo of
- Nothing -> []
- Just p -> [Param "-p", Param (show p)]
- let sshhost = Param $ Git.Url.hostuser repo
- return $ sshoptions ++ sshport ++ [sshhost] ++ sshcmd
+ opts <- map Param . words <$> getConfig repo "ssh-options" ""
+ params <- sshParams (Git.Url.hostuser repo, Git.Url.port repo)
+ return $ opts ++ params ++ sshcmd
{- Generates parameters to run a git-annex-shell command on a remote
- repository. -}
diff --git a/debian/changelog b/debian/changelog
index 5adba128f..f5fc107c7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,11 @@ git-annex (3.20120117) UNRELEASED; urgency=low
previous release!
* fsck --from remote --fast: Avoids expensive file transfers, at the
expense of checking file size and/or contents.
+ * Ssh connection caching is now enabled automatically by git-annex.
+ Only one ssh connection is made to each host per git-annex run, which
+ can speed some things up a lot, as well as avoiding repeated password
+ prompts. Concurrent git-annex processes also share ssh connections.
+ Cached ssh connections are shut down when git-annex exits.
-- Joey Hess <joeyh@debian.org> Thu, 19 Jan 2012 15:12:03 -0400
diff --git a/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn b/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn
index 8fb2bf9db..594d8c480 100644
--- a/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn
+++ b/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn
@@ -57,16 +57,3 @@ b) From the desktop add the remote
So now you can work on the train, pop on the wifi at work upon arrival, and sync up with a `git pull && git annex get`.
An alternative solution may be to use direct tunnels over Openvpn.
-
-## Optimising SSH
-
-Running a `git annex get .`, at least in the version I have, creates a new SSH connection for every file transfer (maybe this should be a feature request?)
-
-Lot's of new small files in an _annex_ cause lot's of connections to be made quickly: this is an relatively expensive overhead and is enough for connection limiting to start in my case. The process can be made much faster by using SSH's connection sharing capabilities. An SSH config like this should do it:
-
- # Global Settings
- ControlMaster auto
- ControlPersist 30
- ControlPath ~/.ssh/master-%r@%h:%p
-
-This will create a master connection for sharing if one isn't present, maintain it for 30 seconds after closing down the connection (just-in-cases') and automatically use the master connection for subsequent connections. Wins all round!
diff --git a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn
index a047370ac..341a9afa4 100644
--- a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn
+++ b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn
@@ -15,7 +15,9 @@ Simple, when performing various git annex command over ssh, in particular a mult
>> pid. Then at shutdown, run `ssh -O exit` on each such socket.
>>
>> Complicated slightly by not doing this if the user has already set up
->> more broad ssh connection caching. --[[Joey]]
+>> more broad ssh connection caching.
+>>
+>> [[done]]! --[[Joey]]
---