summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-04-12 16:32:59 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-04-12 16:32:59 -0400
commit0f1ea37a68535b95c56e1e142ecc8db1ac6b43dc (patch)
tree5527c4de0f0a561afe5e358b7b2e8f491d29146d
parent102e182eac9b95e31fb71830fe5ce5a502e93d92 (diff)
remotedaemon: When network connection is lost, close all cached ssh connections.
This commit was sponsored by Cedric Staub.
-rw-r--r--Annex/Ssh.hs55
-rw-r--r--Assistant/Threads/NetWatcher.hs2
-rw-r--r--RemoteDaemon/Core.hs18
-rw-r--r--RemoteDaemon/Transport/Ssh.hs1
-rw-r--r--RemoteDaemon/Types.hs3
-rw-r--r--debian/changelog2
-rw-r--r--doc/design/git-remote-daemon.mdwn29
7 files changed, 75 insertions, 35 deletions
diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs
index fab25c462..06e3ac449 100644
--- a/Annex/Ssh.hs
+++ b/Annex/Ssh.hs
@@ -11,6 +11,7 @@ module Annex.Ssh (
sshCachingOptions,
sshCacheDir,
sshReadPort,
+ forceSshCleanup,
sshCachingEnv,
sshCachingTo,
inRepoWithSshCachingTo,
@@ -124,21 +125,27 @@ prepSocket socketfile = do
liftIO $ createDirectoryIfMissing True $ parentDir socketfile
lockFile $ socket2lock socketfile
-{- Stop any unused ssh processes. -}
+enumSocketFiles :: Annex [FilePath]
+enumSocketFiles = go =<< sshCacheDir
+ where
+ go Nothing = return []
+ go (Just dir) = liftIO $ filter (not . isLock)
+ <$> catchDefaultIO [] (dirContents dir)
+
+{- Stop any unused ssh connection caching processes. -}
sshCleanup :: Annex ()
-sshCleanup = go =<< sshCacheDir
+sshCleanup = mapM_ cleanup =<< enumSocketFiles
where
- go Nothing = noop
- go (Just dir) = do
- sockets <- liftIO $ filter (not . isLock)
- <$> catchDefaultIO [] (dirContents dir)
- forM_ sockets cleanup
cleanup socketfile = do
#ifndef mingw32_HOST_OS
-- Drop any shared lock we have, and take an
-- exclusive lock, without blocking. If the lock
-- succeeds, nothing is using this ssh, and it can
-- be stopped.
+ --
+ -- After ssh is stopped cannot remove the lock file;
+ -- other processes may be waiting on our exclusive
+ -- lock to use it.
let lockfile = socket2lock socketfile
unlockFile lockfile
mode <- annexFileMode
@@ -148,24 +155,28 @@ sshCleanup = go =<< sshCacheDir
setLock fd (WriteLock, AbsoluteSeek, 0, 0)
case v of
Left _ -> noop
- Right _ -> stopssh socketfile
+ Right _ -> forceStopSsh socketfile
liftIO $ closeFd fd
#else
- stopssh socketfile
+ forceStopSsh socketfile
#endif
- stopssh socketfile = do
- let (dir, base) = splitFileName socketfile
- let params = sshConnectionCachingParams base
- -- "ssh -O stop" is noisy on stderr even with -q
- void $ liftIO $ catchMaybeIO $
- withQuietOutput createProcessSuccess $
- (proc "ssh" $ toCommand $
- [ Params "-O stop"
- ] ++ params ++ [Param "localhost"])
- { cwd = Just dir }
- liftIO $ nukeFile socketfile
- -- Cannot remove the lock file; other processes may
- -- be waiting on our exclusive lock to use it.
+
+{- Stop all ssh connection caching processes, even when they're in use. -}
+forceSshCleanup :: Annex ()
+forceSshCleanup = mapM_ forceStopSsh =<< enumSocketFiles
+
+forceStopSsh :: FilePath -> Annex ()
+forceStopSsh socketfile = do
+ let (dir, base) = splitFileName socketfile
+ let params = sshConnectionCachingParams base
+ -- "ssh -O stop" is noisy on stderr even with -q
+ void $ liftIO $ catchMaybeIO $
+ withQuietOutput createProcessSuccess $
+ (proc "ssh" $ toCommand $
+ [ Params "-O stop"
+ ] ++ params ++ [Param "localhost"])
+ { cwd = Just dir }
+ liftIO $ nukeFile socketfile
{- This needs to be as short as possible, due to limitations on the length
- of the path to a socket file. At the same time, it needs to be unique
diff --git a/Assistant/Threads/NetWatcher.hs b/Assistant/Threads/NetWatcher.hs
index 912893b87..0ff605edc 100644
--- a/Assistant/Threads/NetWatcher.hs
+++ b/Assistant/Threads/NetWatcher.hs
@@ -71,7 +71,7 @@ dbusThread = do
)
handleconn = do
debug ["detected network connection"]
- sendRemoteControl PAUSE
+ sendRemoteControl LOSTNET
notifyNetMessagerRestart
handleConnection
sendRemoteControl RESUME
diff --git a/RemoteDaemon/Core.hs b/RemoteDaemon/Core.hs
index 0c2937103..b3cfe67b2 100644
--- a/RemoteDaemon/Core.hs
+++ b/RemoteDaemon/Core.hs
@@ -18,6 +18,7 @@ import qualified Git.Types as Git
import qualified Git.CurrentRepo
import Utility.SimpleProtocol
import Config
+import Annex.Ssh
import Control.Concurrent.Async
import Control.Concurrent
@@ -65,12 +66,19 @@ runController ichan ochan = do
let common = M.intersection m m'
let new = M.difference m' m
let old = M.difference m m'
- stoprunning old
+ broadcast STOP old
unless paused $
startrunning new
go h paused (M.union common new)
+ LOSTNET -> do
+ -- force close all cached ssh connections
+ -- (done here so that if there are multiple
+ -- ssh remotes, it's only done once)
+ liftAnnex h forceSshCleanup
+ broadcast LOSTNET m
+ go h True M.empty
PAUSE -> do
- stoprunning m
+ broadcast STOP m
go h True M.empty
RESUME -> do
when paused $
@@ -89,9 +97,9 @@ runController ichan ochan = do
startrunning m = forM_ (M.elems m) startrunning'
startrunning' (transport, _) = void $ async transport
- -- Ask the transport nicely to stop.
- stoprunning m = forM_ (M.elems m) stoprunning'
- stoprunning' (_, c) = writeChan c STOP
+ broadcast msg m = forM_ (M.elems m) send
+ where
+ send (_, c) = writeChan c msg
-- Generates a map with a transport for each supported remote in the git repo,
-- except those that have annex.sync = false
diff --git a/RemoteDaemon/Transport/Ssh.hs b/RemoteDaemon/Transport/Ssh.hs
index d6150bbce..ba03a2589 100644
--- a/RemoteDaemon/Transport/Ssh.hs
+++ b/RemoteDaemon/Transport/Ssh.hs
@@ -84,6 +84,7 @@ transport' r url transporthandle ichan ochan = do
msg <- readChan ichan
case msg of
STOP -> return Stopping
+ LOSTNET -> return Stopping
_ -> handlecontrol
-- Old versions of git-annex-shell that do not support
diff --git a/RemoteDaemon/Types.hs b/RemoteDaemon/Types.hs
index eef7389cc..aff910120 100644
--- a/RemoteDaemon/Types.hs
+++ b/RemoteDaemon/Types.hs
@@ -42,6 +42,7 @@ data Emitted
-- Messages that the deamon consumes.
data Consumed
= PAUSE
+ | LOSTNET
| RESUME
| CHANGED RefList
| RELOAD
@@ -63,6 +64,7 @@ instance Proto.Sendable Emitted where
instance Proto.Sendable Consumed where
formatMessage PAUSE = ["PAUSE"]
+ formatMessage LOSTNET = ["LOSTNET"]
formatMessage RESUME = ["RESUME"]
formatMessage (CHANGED refs) =["CHANGED", Proto.serialize refs]
formatMessage RELOAD = ["RELOAD"]
@@ -78,6 +80,7 @@ instance Proto.Receivable Emitted where
instance Proto.Receivable Consumed where
parseCommand "PAUSE" = Proto.parse0 PAUSE
+ parseCommand "LOSTNET" = Proto.parse0 LOSTNET
parseCommand "RESUME" = Proto.parse0 RESUME
parseCommand "CHANGED" = Proto.parse1 CHANGED
parseCommand "RELOAD" = Proto.parse0 RELOAD
diff --git a/debian/changelog b/debian/changelog
index 1b5b39de8..4c1b20df7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,8 @@ git-annex (5.20140413) UNRELEASED; urgency=medium
set up.
* sync, assistant, remotedaemon: Use ssh connection caching for git pushes
and pulls.
+ * remotedaemon: When network connection is lost, close all cached ssh
+ connections.
* Improve handling on monthly/yearly scheduling.
-- Joey Hess <joeyh@debian.org> Fri, 11 Apr 2014 21:33:35 -0400
diff --git a/doc/design/git-remote-daemon.mdwn b/doc/design/git-remote-daemon.mdwn
index ca3a59fce..a3d07eaa6 100644
--- a/doc/design/git-remote-daemon.mdwn
+++ b/doc/design/git-remote-daemon.mdwn
@@ -95,18 +95,18 @@ the webapp.
* `PAUSE`
- This indicates that the network connection has gone down,
- or the user has requested a pause.
+ The user has requested a pause.
git-remote-daemon should close connections and idle.
- Affects all remotes.
+* `LOSTNET`
-* `RESUME`
+ The network connection has been lost.
+ git-remote-daemon should close connections and idle.
- This indicates that the network connection has come back up, or the user
- has asked it to run again. Start back up network connections.
+* `RESUME`
- Affects all remotes.
+ Undoes PAUSE or DISCONNECTED.
+ Start back up network connections.
* `CHANGED ref ...`
@@ -170,6 +170,21 @@ TODO:
* Remote system might not be available. Find a smart way to detect it,
ideally w/o generating network traffic. One way might be to check
if the ssh connection caching control socket exists, for example.
+* Now that ssh connection caching is enabled for git push/pull in sync,
+ there's the possibility that a stale ssh connection may linger when
+ changing network connections, and so attempts to use it will stall.
+ (This was already a potential issue with transfers, which already
+ used the caching.)
+
+ One option is ssh's ServerAliveCountMax, which will make a dead
+ ssh connection disconnect after approx 45 seconds, per ssh manual.
+ It would need to be enabled by setting ServerAliveInterval=15.
+ And this would add network traffic..
+
+ Another option is to disable all cached connections when the network
+ connection changes. This would handle *most* cases. The case
+ not handled is eg, my dialup ppp box getting a new public IP address,
+ which my laptop won't notice. **done**
## telehash