summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-11-18 15:27:44 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-11-18 15:27:44 -0400
commit677aab525a7023642f4b2e9d96db3c3481e8f0b1 (patch)
treee6584a8f1663f364001ad452fe8d09b83fda11a4
parentcb2ec900ae8aa60b4ccf35adeb287823d976be07 (diff)
better streaming while encrypting/decrypting
Both the directory and webdav special remotes used to have to buffer the whole file contents before it could be decrypted, as they read from chunks. Now the chunks are streamed through gpg with no buffering.
-rw-r--r--Creds.hs13
-rw-r--r--Crypto.hs62
-rw-r--r--Remote/Bup.hs5
-rw-r--r--Remote/Directory.hs12
-rw-r--r--Remote/Helper/Encryptable.hs15
-rw-r--r--Remote/Hook.hs6
-rw-r--r--Remote/Rsync.hs6
-rw-r--r--Remote/S3.hs7
-rw-r--r--Remote/WebDAV.hs16
-rw-r--r--Utility/Gpg.hs18
-rw-r--r--debian/changelog2
11 files changed, 83 insertions, 79 deletions
diff --git a/Creds.hs b/Creds.hs
index b907073f5..0c69fc7a5 100644
--- a/Creds.hs
+++ b/Creds.hs
@@ -40,9 +40,9 @@ setRemoteCredPair c storage = go =<< getRemoteCredPair c storage
mcipher <- remoteCipher c
case (mcipher, credPairRemoteKey storage) of
(Just cipher, Just key) | isTrustedCipher c -> do
- s <- liftIO $ withEncryptedContent cipher
- (return $ L.pack $ encodeCredPair creds)
- (return . L.unpack)
+ s <- liftIO $ encrypt cipher
+ (feedBytes $ L.pack $ encodeCredPair creds)
+ (readBytes $ return . L.unpack)
return $ M.insert key (toB64 s) c
_ -> do
writeCacheCredPair creds storage
@@ -62,7 +62,9 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
mcipher <- remoteCipher c
case (M.lookup key c, mcipher) of
(Just enccreds, Just cipher) -> do
- creds <- liftIO $ decrypt enccreds cipher
+ creds <- liftIO $ decrypt cipher
+ (feedBytes $ L.pack $ fromB64 enccreds)
+ (readBytes $ return . L.unpack)
case decodeCredPair creds of
Just credpair -> do
writeCacheCredPair credpair storage
@@ -70,9 +72,6 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
_ -> do error $ "bad " ++ key
_ -> return Nothing
Nothing -> return Nothing
- decrypt enccreds cipher = withDecryptedContent cipher
- (return $ L.pack $ fromB64 enccreds)
- (return . L.unpack)
{- Gets a CredPair from the environment. -}
getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair)
diff --git a/Crypto.hs b/Crypto.hs
index 071fb7a25..fe6c6d5cb 100644
--- a/Crypto.hs
+++ b/Crypto.hs
@@ -18,10 +18,11 @@ module Crypto (
describeCipher,
decryptCipher,
encryptKey,
- withEncryptedHandle,
- withDecryptedHandle,
- withEncryptedContent,
- withDecryptedContent,
+ feedFile,
+ feedBytes,
+ readBytes,
+ encrypt,
+ decrypt,
prop_hmacWithCipher_sane
) where
@@ -90,10 +91,9 @@ describeCipher (EncryptedCipher _ (KeyIds ks)) =
encryptCipher :: Cipher -> KeyIds -> IO StorableCipher
encryptCipher (Cipher c) (KeyIds ks) = do
let ks' = nub $ sort ks -- gpg complains about duplicate recipient keyids
- encipher <- Gpg.pipeStrict (encrypt++recipients ks') c
+ encipher <- Gpg.pipeStrict ([ Params "--encrypt" ] ++ recipients ks') c
return $ EncryptedCipher encipher (KeyIds ks')
where
- encrypt = [ Params "--encrypt" ]
recipients l = force_recipients :
concatMap (\k -> [Param "--recipient", Param k]) l
-- Force gpg to only encrypt to the specified
@@ -103,9 +103,7 @@ encryptCipher (Cipher c) (KeyIds ks) = do
{- Decrypting an EncryptedCipher is expensive; the Cipher should be cached. -}
decryptCipher :: StorableCipher -> IO Cipher
decryptCipher (SharedCipher t) = return $ Cipher t
-decryptCipher (EncryptedCipher t _) = Cipher <$> Gpg.pipeStrict decrypt t
- where
- decrypt = [ Param "--decrypt" ]
+decryptCipher (EncryptedCipher t _) = Cipher <$> Gpg.pipeStrict [ Param "--decrypt" ] t
{- Generates an encrypted form of a Key. The encryption does not need to be
- reversable, nor does it need to be the same type of encryption used
@@ -118,31 +116,27 @@ encryptKey c k = Key
, keyMtime = Nothing -- to avoid leaking data
}
-{- Runs an action, passing it a handle from which it can
- - stream encrypted content. -}
-withEncryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a
-withEncryptedHandle = Gpg.passphraseHandle [Params "--symmetric --force-mdc"] . cipherPassphrase
-
-{- Runs an action, passing it a handle from which it can
- - stream decrypted content. -}
-withDecryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a
-withDecryptedHandle = Gpg.passphraseHandle [Param "--decrypt"] . cipherPassphrase
-
-{- Streams encrypted content to an action. -}
-withEncryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a
-withEncryptedContent = pass withEncryptedHandle
-
-{- Streams decrypted content to an action. -}
-withDecryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a
-withDecryptedContent = pass withDecryptedHandle
-
-pass
- :: (Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a)
- -> Cipher
- -> IO L.ByteString
- -> (L.ByteString -> IO a)
- -> IO a
-pass to n s a = to n s $ a <=< L.hGetContents
+type Feeder = Handle -> IO ()
+type Reader a = Handle -> IO a
+
+feedFile :: FilePath -> Feeder
+feedFile f h = L.hPut h =<< L.readFile f
+
+feedBytes :: L.ByteString -> Feeder
+feedBytes = flip L.hPut
+
+readBytes :: (L.ByteString -> IO a) -> Reader a
+readBytes a h = L.hGetContents h >>= a
+
+{- Runs a Feeder action, that generates content that is encrypted with the
+ - Cipher, and read by the Reader action. -}
+encrypt :: Cipher -> Feeder -> Reader a -> IO a
+encrypt = Gpg.feedRead [Params "--symmetric --force-mdc"] . cipherPassphrase
+
+{- Runs a Feeder action, that generates content that is decrypted with the
+ - Cipher, and read by the Reader action. -}
+decrypt :: Cipher -> Feeder -> Reader a -> IO a
+decrypt = Gpg.feedRead [Param "--decrypt"] . cipherPassphrase
hmacWithCipher :: Cipher -> String -> String
hmacWithCipher c = hmacWithCipher' (cipherHmac c)
diff --git a/Remote/Bup.hs b/Remote/Bup.hs
index f5bcc4f45..62db01a7b 100644
--- a/Remote/Bup.hs
+++ b/Remote/Bup.hs
@@ -125,7 +125,7 @@ storeEncrypted r buprepo (cipher, enck) k _p = do
src <- inRepo $ gitAnnexLocation k
params <- bupSplitParams r buprepo enck []
liftIO $ catchBoolIO $
- withEncryptedHandle cipher (L.readFile src) $ \h ->
+ encrypt cipher (feedFile src) $ \h ->
pipeBup params (Just h) Nothing
retrieve :: BupRepo -> Key -> AssociatedFile -> FilePath -> Annex Bool
@@ -141,7 +141,8 @@ retrieveCheap _ _ _ = return False
retrieveEncrypted :: BupRepo -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted buprepo (cipher, enck) _ f = liftIO $ catchBoolIO $
withHandle StdoutHandle createProcessSuccess p $ \h -> do
- withDecryptedContent cipher (L.hGetContents h) $ L.writeFile f
+ decrypt cipher (\toh -> L.hPut toh =<< L.hGetContents h) $
+ readBytes $ L.writeFile f
return True
where
params = bupParams "join" buprepo [Param $ bupRef enck]
diff --git a/Remote/Directory.hs b/Remote/Directory.hs
index 794a8c468..0527270a1 100644
--- a/Remote/Directory.hs
+++ b/Remote/Directory.hs
@@ -118,13 +118,13 @@ storeEncrypted d chunksize (cipher, enck) k p = do
src <- inRepo $ gitAnnexLocation k
metered (Just p) k $ \meterupdate ->
storeHelper d chunksize enck $ \dests ->
- withEncryptedContent cipher (L.readFile src) $ \s ->
+ encrypt cipher (feedFile src) $ readBytes $ \b ->
case chunksize of
Nothing -> do
let dest = Prelude.head dests
- meteredWriteFile meterupdate dest s
+ meteredWriteFile meterupdate dest b
return [dest]
- Just _ -> storeSplit meterupdate chunksize dests s
+ Just _ -> storeSplit meterupdate chunksize dests b
{- Splits a ByteString into chunks and writes to dests, obeying configured
- chunk size (not to be confused with the L.ByteString chunk size).
@@ -192,9 +192,11 @@ retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> FilePath -
retrieveEncrypted d chunksize (cipher, enck) k f = metered Nothing k $ \meterupdate ->
liftIO $ withStoredFiles chunksize d enck $ \files ->
catchBoolIO $ do
- withDecryptedContent cipher (L.concat <$> mapM L.readFile files) $
- meteredWriteFile meterupdate f
+ decrypt cipher (feeder files) $
+ readBytes $ meteredWriteFile meterupdate f
return True
+ where
+ feeder files h = forM_ files $ \file -> L.hPut h =<< L.readFile file
retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks
diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs
index 12c7d37e9..a48ec813a 100644
--- a/Remote/Helper/Encryptable.hs
+++ b/Remote/Helper/Encryptable.hs
@@ -81,12 +81,11 @@ remoteCipher c = go $ extractCipher c
cache <- Annex.getState Annex.ciphers
case M.lookup encipher cache of
Just cipher -> return $ Just cipher
- Nothing -> decrypt encipher cache
- decrypt encipher cache = do
- showNote "gpg"
- cipher <- liftIO $ decryptCipher encipher
- Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache })
- return $ Just cipher
+ Nothing -> do
+ showNote "gpg"
+ cipher <- liftIO $ decryptCipher encipher
+ Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache })
+ return $ Just cipher
{- Checks if there is a trusted (non-shared) cipher. -}
isTrustedCipher :: RemoteConfig -> Bool
@@ -96,9 +95,9 @@ isTrustedCipher c =
{- Gets encryption Cipher, and encrypted version of Key. -}
cipherKey :: Maybe RemoteConfig -> Key -> Annex (Maybe (Cipher, Key))
cipherKey Nothing _ = return Nothing
-cipherKey (Just c) k = maybe Nothing encrypt <$> remoteCipher c
+cipherKey (Just c) k = maybe Nothing make <$> remoteCipher c
where
- encrypt ciphertext = Just (ciphertext, encryptKey ciphertext k)
+ make ciphertext = Just (ciphertext, encryptKey ciphertext k)
{- Stores an StorableCipher in a remote's configuration. -}
storeCipher :: RemoteConfig -> StorableCipher -> RemoteConfig
diff --git a/Remote/Hook.hs b/Remote/Hook.hs
index f9a143ccd..7173a5b80 100644
--- a/Remote/Hook.hs
+++ b/Remote/Hook.hs
@@ -108,7 +108,8 @@ store h k _f _p = do
storeEncrypted :: String -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted h (cipher, enck) k _p = withTmp enck $ \tmp -> do
src <- inRepo $ gitAnnexLocation k
- liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
+ liftIO $ encrypt cipher (feedFile src) $
+ readBytes $ L.writeFile tmp
runHook h "store" enck (Just tmp) $ return True
retrieve :: String -> Key -> AssociatedFile -> FilePath -> Annex Bool
@@ -120,7 +121,8 @@ retrieveCheap _ _ _ = return False
retrieveEncrypted :: String -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted h (cipher, enck) _ f = withTmp enck $ \tmp ->
runHook h "retrieve" enck (Just tmp) $ liftIO $ catchBoolIO $ do
- withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f
+ decrypt cipher (feedFile tmp) $
+ readBytes $ L.writeFile f
return True
remove :: String -> Key -> Annex Bool
diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs
index 1d5f2d28c..c48a9c14a 100644
--- a/Remote/Rsync.hs
+++ b/Remote/Rsync.hs
@@ -110,7 +110,8 @@ store o k _f p = rsyncSend o p k <=< inRepo $ gitAnnexLocation k
storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted o (cipher, enck) k p = withTmp enck $ \tmp -> do
src <- inRepo $ gitAnnexLocation k
- liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
+ liftIO $ decrypt cipher (feedFile src) $
+ readBytes $ L.writeFile tmp
rsyncSend o p enck tmp
retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool
@@ -128,7 +129,8 @@ retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted o (cipher, enck) _ f = withTmp enck $ \tmp -> do
ifM (retrieve o enck undefined tmp)
( liftIO $ catchBoolIO $ do
- withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f
+ decrypt cipher (feedFile tmp) $
+ readBytes $ L.writeFile f
return True
, return False
)
diff --git a/Remote/S3.hs b/Remote/S3.hs
index 93620dfd0..54136f8a0 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -122,7 +122,8 @@ storeEncrypted r (cipher, enck) k _p = s3Action r False $ \(conn, bucket) ->
-- (An alternative would be chunking to to a constant size.)
withTmp enck $ \tmp -> do
f <- inRepo $ gitAnnexLocation k
- liftIO $ withEncryptedContent cipher (L.readFile f) $ L.writeFile tmp
+ liftIO $ encrypt cipher (feedFile f) $
+ readBytes $ L.writeFile tmp
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
s3Bool res
@@ -162,8 +163,8 @@ retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted r (cipher, enck) _ f = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey r bucket enck
case res of
- Right o -> liftIO $
- withDecryptedContent cipher (return $ obj_data o) $ \content -> do
+ Right o -> liftIO $ decrypt cipher (feedBytes $ obj_data o) $
+ readBytes $ \content -> do
L.writeFile f content
return True
Left e -> s3Warning e
diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs
index aab5e7efb..ea4800c9d 100644
--- a/Remote/WebDAV.hs
+++ b/Remote/WebDAV.hs
@@ -93,8 +93,8 @@ storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted r (cipher, enck) k _p = davAction r False $ \(baseurl, user, pass) -> do
let url = davLocation baseurl enck
f <- inRepo $ gitAnnexLocation k
- liftIO $ withEncryptedContent cipher (L.readFile f) $
- storeHelper r url user pass
+ liftIO $ encrypt cipher (feedFile f) $
+ readBytes $ storeHelper r url user pass
storeHelper :: Remote -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool
storeHelper r urlbase user pass b = catchBoolIO $ do
@@ -133,18 +133,20 @@ retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted r (cipher, enck) k d = metered Nothing k $ \meterupdate ->
davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $
withStoredFiles r enck baseurl user pass onerr $ \urls -> do
- withDecryptedContent cipher (L.concat <$> feeder user pass urls []) $
- meteredWriteFile meterupdate d
+ decrypt cipher (feeder user pass urls) $
+ readBytes $ meteredWriteFile meterupdate d
return True
where
onerr _ = return False
- feeder _ _ [] c = return $ reverse c
- feeder user pass (url:urls) c = do
+ feeder _ _ [] _ = noop
+ feeder user pass (url:urls) h = do
mb <- davGetUrlContent url user pass
case mb of
Nothing -> throwIO "download failed"
- Just b -> feeder user pass urls (b:c)
+ Just b -> do
+ L.hPut h b
+ feeder user pass urls h
remove :: Remote -> Key -> Annex Bool
remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do
diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs
index b5cffb1c5..054e6ca17 100644
--- a/Utility/Gpg.hs
+++ b/Utility/Gpg.hs
@@ -7,7 +7,6 @@
module Utility.Gpg where
-import qualified Data.ByteString.Lazy as L
import System.Posix.Types
import Control.Applicative
import Control.Concurrent
@@ -54,14 +53,15 @@ pipeStrict params input = do
hClose to
hGetContentsStrict from
-{- Runs gpg with some parameters, first feeding it a passphrase via
- - --passphrase-fd, then feeding it an input, and passing a handle
- - to its output to an action.
+{- Runs gpg with some parameters. First sends it a passphrase via
+ - --passphrase-fd. Then runs a feeder action that is passed a handle and
+ - should write to it all the data to input to gpg. Finally, runs
+ - a reader action that is passed a handle to gpg's output.
-
- Note that to avoid deadlock with the cleanup stage,
- - the action must fully consume gpg's input before returning. -}
-passphraseHandle :: [CommandParam] -> String -> IO L.ByteString -> (Handle -> IO a) -> IO a
-passphraseHandle params passphrase a b = do
+ - the reader must fully consume gpg's input before returning. -}
+feedRead :: [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a
+feedRead params passphrase feeder reader = do
-- pipe the passphrase into gpg on a fd
(frompipe, topipe) <- createPipe
void $ forkIO $ do
@@ -77,9 +77,9 @@ passphraseHandle params passphrase a b = do
where
go (to, from) = do
void $ forkIO $ do
- L.hPut to =<< a
+ feeder to
hClose to
- b from
+ reader from
{- Finds gpg public keys matching some string. (Could be an email address,
- a key id, or a name. -}
diff --git a/debian/changelog b/debian/changelog
index dbbb43333..b1547a813 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -7,6 +7,8 @@ git-annex (3.20121113) UNRELEASED; urgency=low
added, including when new repository configurations are pushed in from
remotes.
* OSX: Fix RunAtLoad value in plist file.
+ * Getting a file from chunked directory special remotes no longer buffers
+ it all in memory.
-- Joey Hess <joeyh@debian.org> Tue, 13 Nov 2012 13:17:07 -0400