summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-04-19 15:26:50 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-04-19 15:27:03 -0400
commit5985acdfad8a6791f0b2fc54a1e116cee9c12479 (patch)
tree79bb1aa8ff55e4c53dc5ca08ea511aa6f9bd4232
parentb1274b637863ebb4e14d39ca2cf00a27c9d1f142 (diff)
bup: Avoid memory leak when transferring encrypted data.
This was a most surprising leak. It occurred in the process that is forked off to feed data to gpg. That process was passed a lazy ByteString of input, and ghc seemed to not GC the ByteString as it was lazily read and consumed, so memory slowly leaked as the file was read and passed through gpg to bup. To fix it, I simply changed the feeder to take an IO action that returns the lazy bytestring, and fed the result directly to hPut. AFAICS, this should change nothing WRT buffering. But somehow it makes ghc's GC do the right thing. Probably I triggered some weakness in ghc's GC (version 6.12.1). (Note that S3 still has this leak, and others too. Fixing it will involve another dance with the type system.) Update: One theory I have is that this has something to do with the forking of the feeder process. Perhaps, when the ByteString is produced before the fork, ghc decides it need to hold a pointer to the start of it, for some reason -- maybe it doesn't realize that it is only used in the forked process.
-rw-r--r--Crypto.hs20
-rw-r--r--Remote/Bup.hs6
-rw-r--r--Remote/Directory.hs6
-rw-r--r--Remote/S3real.hs4
-rw-r--r--debian/changelog3
5 files changed, 18 insertions, 21 deletions
diff --git a/Crypto.hs b/Crypto.hs
index 41f6b999b..478d83761 100644
--- a/Crypto.hs
+++ b/Crypto.hs
@@ -153,24 +153,24 @@ encryptKey c k =
{- Runs an action, passing it a handle from which it can
- stream encrypted content. -}
-withEncryptedHandle :: Cipher -> L.ByteString -> (Handle -> IO a) -> IO a
+withEncryptedHandle :: Cipher -> (IO L.ByteString) -> (Handle -> IO a) -> IO a
withEncryptedHandle = gpgCipherHandle [Params "--symmetric --force-mdc"]
{- Runs an action, passing it a handle from which it can
- stream decrypted content. -}
-withDecryptedHandle :: Cipher -> L.ByteString -> (Handle -> IO a) -> IO a
+withDecryptedHandle :: Cipher -> (IO L.ByteString) -> (Handle -> IO a) -> IO a
withDecryptedHandle = gpgCipherHandle [Param "--decrypt"]
{- Streams encrypted content to an action. -}
-withEncryptedContent :: Cipher -> L.ByteString -> (L.ByteString -> IO a) -> IO a
+withEncryptedContent :: Cipher -> (IO L.ByteString) -> (L.ByteString -> IO a) -> IO a
withEncryptedContent = pass withEncryptedHandle
{- Streams decrypted content to an action. -}
-withDecryptedContent :: Cipher -> L.ByteString -> (L.ByteString -> IO a) -> IO a
+withDecryptedContent :: Cipher -> (IO L.ByteString) -> (L.ByteString -> IO a) -> IO a
withDecryptedContent = pass withDecryptedHandle
-pass :: (Cipher -> L.ByteString -> (Handle -> IO a) -> IO a)
- -> Cipher -> L.ByteString -> (L.ByteString -> IO a) -> IO a
+pass :: (Cipher -> (IO L.ByteString) -> (Handle -> IO a) -> IO a)
+ -> Cipher -> (IO L.ByteString) -> (L.ByteString -> IO a) -> IO a
pass to c i a = to c i $ \h -> a =<< L.hGetContents h
gpgParams :: [CommandParam] -> IO [String]
@@ -203,8 +203,8 @@ gpgPipeStrict params input = do
-
- Note that to avoid deadlock with the cleanup stage,
- the action must fully consume gpg's input before returning. -}
-gpgCipherHandle :: [CommandParam] -> Cipher -> L.ByteString -> (Handle -> IO a) -> IO a
-gpgCipherHandle params c input a = do
+gpgCipherHandle :: [CommandParam] -> Cipher -> (IO L.ByteString) -> (Handle -> IO a) -> IO a
+gpgCipherHandle params c a b = do
-- pipe the passphrase into gpg on a fd
(frompipe, topipe) <- createPipe
_ <- forkIO $ do
@@ -217,11 +217,11 @@ gpgCipherHandle params c input a = do
params' <- gpgParams $ passphrase ++ params
(pid, fromh, toh) <- hPipeBoth "gpg" params'
_ <- forkProcess $ do
- L.hPut toh input
+ L.hPut toh =<< a
hClose toh
exitSuccess
hClose toh
- ret <- a fromh
+ ret <- b fromh
-- cleanup
forceSuccess pid
diff --git a/Remote/Bup.hs b/Remote/Bup.hs
index 16e1bbdcb..6ae002c3b 100644
--- a/Remote/Bup.hs
+++ b/Remote/Bup.hs
@@ -126,8 +126,7 @@ storeEncrypted r buprepo (cipher, enck) k = do
let src = gitAnnexLocation g k
params <- bupSplitParams r buprepo enck (Param "-")
liftIO $ catchBool $ do
- content <- L.readFile src
- withEncryptedHandle cipher content $ \h -> do
+ withEncryptedHandle cipher (L.readFile src) $ \h -> do
pipeBup params (Just h) Nothing
retrieve :: BupRepo -> Key -> FilePath -> Annex Bool
@@ -142,8 +141,7 @@ retrieveEncrypted buprepo (cipher, enck) f = do
let params = bupParams "join" buprepo [Param $ show enck]
liftIO $ catchBool $ do
(pid, h) <- hPipeFrom "bup" $ toCommand params
- content <- L.hGetContents h
- withDecryptedContent cipher content $ L.writeFile f
+ withDecryptedContent cipher (L.hGetContents h) $ L.writeFile f
forceSuccess pid
return True
diff --git a/Remote/Directory.hs b/Remote/Directory.hs
index d9bee80c3..c680d6121 100644
--- a/Remote/Directory.hs
+++ b/Remote/Directory.hs
@@ -92,8 +92,7 @@ storeEncrypted d (cipher, enck) k = do
liftIO $ catchBool $ storeHelper dest $ encrypt src dest
where
encrypt src dest = do
- content <- L.readFile src
- withEncryptedContent cipher content $ L.writeFile dest
+ withEncryptedContent cipher (L.readFile src) $ L.writeFile dest
return True
storeHelper :: FilePath -> IO Bool -> IO Bool
@@ -113,8 +112,7 @@ retrieve d k f = liftIO $ copyFile (dirKey d k) f
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted d (cipher, enck) f =
liftIO $ catchBool $ do
- content <- L.readFile (dirKey d enck)
- withDecryptedContent cipher content $ L.writeFile f
+ withDecryptedContent cipher (L.readFile (dirKey d enck)) $ L.writeFile f
return True
remove :: FilePath -> Key -> Annex Bool
diff --git a/Remote/S3real.hs b/Remote/S3real.hs
index 5d8435932..f40deaf17 100644
--- a/Remote/S3real.hs
+++ b/Remote/S3real.hs
@@ -106,7 +106,7 @@ store r k = s3Action r False $ \(conn, bucket) -> do
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do
content <- lazyKeyContent k
- res <- liftIO $ withEncryptedContent cipher content $ \s -> do
+ res <- liftIO $ withEncryptedContent cipher (return content) $ \s -> do
storeHelper (conn, bucket) r enck s
s3Bool res
@@ -139,7 +139,7 @@ retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey bucket enck L.empty
case res of
Right o -> liftIO $
- withDecryptedContent cipher (obj_data o) $ \content -> do
+ withDecryptedContent cipher (return $ obj_data o) $ \content -> do
L.writeFile f content
return True
Left e -> s3Warning e
diff --git a/debian/changelog b/debian/changelog
index 60ccace7a..4e9ea441d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,7 +3,8 @@ git-annex (0.20110418) UNRELEASED; urgency=low
* Don't run gpg in batch mode, so it can prompt for passphrase when
there is no agent.
* Add missing build dep on dataenc.
- * Fix stalls in S3 when transferring encrypted data.
+ * S3: Fix stalls when transferring encrypted data.
+ * bup: Avoid memory leak when transferring encrypted data.
-- Joey Hess <joeyh@debian.org> Sun, 17 Apr 2011 14:29:49 -0400