diff options
author | Joey Hess <joey@kitenet.net> | 2012-11-18 15:27:44 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2012-11-18 15:27:44 -0400 |
commit | 677aab525a7023642f4b2e9d96db3c3481e8f0b1 (patch) | |
tree | e6584a8f1663f364001ad452fe8d09b83fda11a4 | |
parent | cb2ec900ae8aa60b4ccf35adeb287823d976be07 (diff) |
better streaming while encrypting/decrypting
Both the directory and webdav special remotes used to have to buffer
the whole file contents before it could be decrypted, as they read
from chunks. Now the chunks are streamed through gpg with no buffering.
-rw-r--r-- | Creds.hs | 13 | ||||
-rw-r--r-- | Crypto.hs | 62 | ||||
-rw-r--r-- | Remote/Bup.hs | 5 | ||||
-rw-r--r-- | Remote/Directory.hs | 12 | ||||
-rw-r--r-- | Remote/Helper/Encryptable.hs | 15 | ||||
-rw-r--r-- | Remote/Hook.hs | 6 | ||||
-rw-r--r-- | Remote/Rsync.hs | 6 | ||||
-rw-r--r-- | Remote/S3.hs | 7 | ||||
-rw-r--r-- | Remote/WebDAV.hs | 16 | ||||
-rw-r--r-- | Utility/Gpg.hs | 18 | ||||
-rw-r--r-- | debian/changelog | 2 |
11 files changed, 83 insertions, 79 deletions
@@ -40,9 +40,9 @@ setRemoteCredPair c storage = go =<< getRemoteCredPair c storage mcipher <- remoteCipher c case (mcipher, credPairRemoteKey storage) of (Just cipher, Just key) | isTrustedCipher c -> do - s <- liftIO $ withEncryptedContent cipher - (return $ L.pack $ encodeCredPair creds) - (return . L.unpack) + s <- liftIO $ encrypt cipher + (feedBytes $ L.pack $ encodeCredPair creds) + (readBytes $ return . L.unpack) return $ M.insert key (toB64 s) c _ -> do writeCacheCredPair creds storage @@ -62,7 +62,9 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv mcipher <- remoteCipher c case (M.lookup key c, mcipher) of (Just enccreds, Just cipher) -> do - creds <- liftIO $ decrypt enccreds cipher + creds <- liftIO $ decrypt cipher + (feedBytes $ L.pack $ fromB64 enccreds) + (readBytes $ return . L.unpack) case decodeCredPair creds of Just credpair -> do writeCacheCredPair credpair storage @@ -70,9 +72,6 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv _ -> do error $ "bad " ++ key _ -> return Nothing Nothing -> return Nothing - decrypt enccreds cipher = withDecryptedContent cipher - (return $ L.pack $ fromB64 enccreds) - (return . L.unpack) {- Gets a CredPair from the environment. -} getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair) @@ -18,10 +18,11 @@ module Crypto ( describeCipher, decryptCipher, encryptKey, - withEncryptedHandle, - withDecryptedHandle, - withEncryptedContent, - withDecryptedContent, + feedFile, + feedBytes, + readBytes, + encrypt, + decrypt, prop_hmacWithCipher_sane ) where @@ -90,10 +91,9 @@ describeCipher (EncryptedCipher _ (KeyIds ks)) = encryptCipher :: Cipher -> KeyIds -> IO StorableCipher encryptCipher (Cipher c) (KeyIds ks) = do let ks' = nub $ sort ks -- gpg complains about duplicate recipient keyids - encipher <- Gpg.pipeStrict (encrypt++recipients ks') c + encipher <- Gpg.pipeStrict ([ Params "--encrypt" ] ++ recipients ks') c return $ EncryptedCipher encipher (KeyIds ks') where - encrypt = [ Params "--encrypt" ] recipients l = force_recipients : concatMap (\k -> [Param "--recipient", Param k]) l -- Force gpg to only encrypt to the specified @@ -103,9 +103,7 @@ encryptCipher (Cipher c) (KeyIds ks) = do {- Decrypting an EncryptedCipher is expensive; the Cipher should be cached. -} decryptCipher :: StorableCipher -> IO Cipher decryptCipher (SharedCipher t) = return $ Cipher t -decryptCipher (EncryptedCipher t _) = Cipher <$> Gpg.pipeStrict decrypt t - where - decrypt = [ Param "--decrypt" ] +decryptCipher (EncryptedCipher t _) = Cipher <$> Gpg.pipeStrict [ Param "--decrypt" ] t {- Generates an encrypted form of a Key. The encryption does not need to be - reversable, nor does it need to be the same type of encryption used @@ -118,31 +116,27 @@ encryptKey c k = Key , keyMtime = Nothing -- to avoid leaking data } -{- Runs an action, passing it a handle from which it can - - stream encrypted content. -} -withEncryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a -withEncryptedHandle = Gpg.passphraseHandle [Params "--symmetric --force-mdc"] . cipherPassphrase - -{- Runs an action, passing it a handle from which it can - - stream decrypted content. -} -withDecryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a -withDecryptedHandle = Gpg.passphraseHandle [Param "--decrypt"] . cipherPassphrase - -{- Streams encrypted content to an action. -} -withEncryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a -withEncryptedContent = pass withEncryptedHandle - -{- Streams decrypted content to an action. -} -withDecryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a -withDecryptedContent = pass withDecryptedHandle - -pass - :: (Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a) - -> Cipher - -> IO L.ByteString - -> (L.ByteString -> IO a) - -> IO a -pass to n s a = to n s $ a <=< L.hGetContents +type Feeder = Handle -> IO () +type Reader a = Handle -> IO a + +feedFile :: FilePath -> Feeder +feedFile f h = L.hPut h =<< L.readFile f + +feedBytes :: L.ByteString -> Feeder +feedBytes = flip L.hPut + +readBytes :: (L.ByteString -> IO a) -> Reader a +readBytes a h = L.hGetContents h >>= a + +{- Runs a Feeder action, that generates content that is encrypted with the + - Cipher, and read by the Reader action. -} +encrypt :: Cipher -> Feeder -> Reader a -> IO a +encrypt = Gpg.feedRead [Params "--symmetric --force-mdc"] . cipherPassphrase + +{- Runs a Feeder action, that generates content that is decrypted with the + - Cipher, and read by the Reader action. -} +decrypt :: Cipher -> Feeder -> Reader a -> IO a +decrypt = Gpg.feedRead [Param "--decrypt"] . cipherPassphrase hmacWithCipher :: Cipher -> String -> String hmacWithCipher c = hmacWithCipher' (cipherHmac c) diff --git a/Remote/Bup.hs b/Remote/Bup.hs index f5bcc4f45..62db01a7b 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -125,7 +125,7 @@ storeEncrypted r buprepo (cipher, enck) k _p = do src <- inRepo $ gitAnnexLocation k params <- bupSplitParams r buprepo enck [] liftIO $ catchBoolIO $ - withEncryptedHandle cipher (L.readFile src) $ \h -> + encrypt cipher (feedFile src) $ \h -> pipeBup params (Just h) Nothing retrieve :: BupRepo -> Key -> AssociatedFile -> FilePath -> Annex Bool @@ -141,7 +141,8 @@ retrieveCheap _ _ _ = return False retrieveEncrypted :: BupRepo -> (Cipher, Key) -> Key -> FilePath -> Annex Bool retrieveEncrypted buprepo (cipher, enck) _ f = liftIO $ catchBoolIO $ withHandle StdoutHandle createProcessSuccess p $ \h -> do - withDecryptedContent cipher (L.hGetContents h) $ L.writeFile f + decrypt cipher (\toh -> L.hPut toh =<< L.hGetContents h) $ + readBytes $ L.writeFile f return True where params = bupParams "join" buprepo [Param $ bupRef enck] diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 794a8c468..0527270a1 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -118,13 +118,13 @@ storeEncrypted d chunksize (cipher, enck) k p = do src <- inRepo $ gitAnnexLocation k metered (Just p) k $ \meterupdate -> storeHelper d chunksize enck $ \dests -> - withEncryptedContent cipher (L.readFile src) $ \s -> + encrypt cipher (feedFile src) $ readBytes $ \b -> case chunksize of Nothing -> do let dest = Prelude.head dests - meteredWriteFile meterupdate dest s + meteredWriteFile meterupdate dest b return [dest] - Just _ -> storeSplit meterupdate chunksize dests s + Just _ -> storeSplit meterupdate chunksize dests b {- Splits a ByteString into chunks and writes to dests, obeying configured - chunk size (not to be confused with the L.ByteString chunk size). @@ -192,9 +192,11 @@ retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> FilePath - retrieveEncrypted d chunksize (cipher, enck) k f = metered Nothing k $ \meterupdate -> liftIO $ withStoredFiles chunksize d enck $ \files -> catchBoolIO $ do - withDecryptedContent cipher (L.concat <$> mapM L.readFile files) $ - meteredWriteFile meterupdate f + decrypt cipher (feeder files) $ + readBytes $ meteredWriteFile meterupdate f return True + where + feeder files h = forM_ files $ \file -> L.hPut h =<< L.readFile file retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 12c7d37e9..a48ec813a 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -81,12 +81,11 @@ remoteCipher c = go $ extractCipher c cache <- Annex.getState Annex.ciphers case M.lookup encipher cache of Just cipher -> return $ Just cipher - Nothing -> decrypt encipher cache - decrypt encipher cache = do - showNote "gpg" - cipher <- liftIO $ decryptCipher encipher - Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache }) - return $ Just cipher + Nothing -> do + showNote "gpg" + cipher <- liftIO $ decryptCipher encipher + Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache }) + return $ Just cipher {- Checks if there is a trusted (non-shared) cipher. -} isTrustedCipher :: RemoteConfig -> Bool @@ -96,9 +95,9 @@ isTrustedCipher c = {- Gets encryption Cipher, and encrypted version of Key. -} cipherKey :: Maybe RemoteConfig -> Key -> Annex (Maybe (Cipher, Key)) cipherKey Nothing _ = return Nothing -cipherKey (Just c) k = maybe Nothing encrypt <$> remoteCipher c +cipherKey (Just c) k = maybe Nothing make <$> remoteCipher c where - encrypt ciphertext = Just (ciphertext, encryptKey ciphertext k) + make ciphertext = Just (ciphertext, encryptKey ciphertext k) {- Stores an StorableCipher in a remote's configuration. -} storeCipher :: RemoteConfig -> StorableCipher -> RemoteConfig diff --git a/Remote/Hook.hs b/Remote/Hook.hs index f9a143ccd..7173a5b80 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -108,7 +108,8 @@ store h k _f _p = do storeEncrypted :: String -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool storeEncrypted h (cipher, enck) k _p = withTmp enck $ \tmp -> do src <- inRepo $ gitAnnexLocation k - liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp + liftIO $ encrypt cipher (feedFile src) $ + readBytes $ L.writeFile tmp runHook h "store" enck (Just tmp) $ return True retrieve :: String -> Key -> AssociatedFile -> FilePath -> Annex Bool @@ -120,7 +121,8 @@ retrieveCheap _ _ _ = return False retrieveEncrypted :: String -> (Cipher, Key) -> Key -> FilePath -> Annex Bool retrieveEncrypted h (cipher, enck) _ f = withTmp enck $ \tmp -> runHook h "retrieve" enck (Just tmp) $ liftIO $ catchBoolIO $ do - withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f + decrypt cipher (feedFile tmp) $ + readBytes $ L.writeFile f return True remove :: String -> Key -> Annex Bool diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 1d5f2d28c..c48a9c14a 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -110,7 +110,8 @@ store o k _f p = rsyncSend o p k <=< inRepo $ gitAnnexLocation k storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool storeEncrypted o (cipher, enck) k p = withTmp enck $ \tmp -> do src <- inRepo $ gitAnnexLocation k - liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp + liftIO $ decrypt cipher (feedFile src) $ + readBytes $ L.writeFile tmp rsyncSend o p enck tmp retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool @@ -128,7 +129,8 @@ retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> Annex Bool retrieveEncrypted o (cipher, enck) _ f = withTmp enck $ \tmp -> do ifM (retrieve o enck undefined tmp) ( liftIO $ catchBoolIO $ do - withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f + decrypt cipher (feedFile tmp) $ + readBytes $ L.writeFile f return True , return False ) diff --git a/Remote/S3.hs b/Remote/S3.hs index 93620dfd0..54136f8a0 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -122,7 +122,8 @@ storeEncrypted r (cipher, enck) k _p = s3Action r False $ \(conn, bucket) -> -- (An alternative would be chunking to to a constant size.) withTmp enck $ \tmp -> do f <- inRepo $ gitAnnexLocation k - liftIO $ withEncryptedContent cipher (L.readFile f) $ L.writeFile tmp + liftIO $ encrypt cipher (feedFile f) $ + readBytes $ L.writeFile tmp res <- liftIO $ storeHelper (conn, bucket) r enck tmp s3Bool res @@ -162,8 +163,8 @@ retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> Annex Bool retrieveEncrypted r (cipher, enck) _ f = s3Action r False $ \(conn, bucket) -> do res <- liftIO $ getObject conn $ bucketKey r bucket enck case res of - Right o -> liftIO $ - withDecryptedContent cipher (return $ obj_data o) $ \content -> do + Right o -> liftIO $ decrypt cipher (feedBytes $ obj_data o) $ + readBytes $ \content -> do L.writeFile f content return True Left e -> s3Warning e diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index aab5e7efb..ea4800c9d 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -93,8 +93,8 @@ storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool storeEncrypted r (cipher, enck) k _p = davAction r False $ \(baseurl, user, pass) -> do let url = davLocation baseurl enck f <- inRepo $ gitAnnexLocation k - liftIO $ withEncryptedContent cipher (L.readFile f) $ - storeHelper r url user pass + liftIO $ encrypt cipher (feedFile f) $ + readBytes $ storeHelper r url user pass storeHelper :: Remote -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool storeHelper r urlbase user pass b = catchBoolIO $ do @@ -133,18 +133,20 @@ retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> Annex Bool retrieveEncrypted r (cipher, enck) k d = metered Nothing k $ \meterupdate -> davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ withStoredFiles r enck baseurl user pass onerr $ \urls -> do - withDecryptedContent cipher (L.concat <$> feeder user pass urls []) $ - meteredWriteFile meterupdate d + decrypt cipher (feeder user pass urls) $ + readBytes $ meteredWriteFile meterupdate d return True where onerr _ = return False - feeder _ _ [] c = return $ reverse c - feeder user pass (url:urls) c = do + feeder _ _ [] _ = noop + feeder user pass (url:urls) h = do mb <- davGetUrlContent url user pass case mb of Nothing -> throwIO "download failed" - Just b -> feeder user pass urls (b:c) + Just b -> do + L.hPut h b + feeder user pass urls h remove :: Remote -> Key -> Annex Bool remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs index b5cffb1c5..054e6ca17 100644 --- a/Utility/Gpg.hs +++ b/Utility/Gpg.hs @@ -7,7 +7,6 @@ module Utility.Gpg where -import qualified Data.ByteString.Lazy as L import System.Posix.Types import Control.Applicative import Control.Concurrent @@ -54,14 +53,15 @@ pipeStrict params input = do hClose to hGetContentsStrict from -{- Runs gpg with some parameters, first feeding it a passphrase via - - --passphrase-fd, then feeding it an input, and passing a handle - - to its output to an action. +{- Runs gpg with some parameters. First sends it a passphrase via + - --passphrase-fd. Then runs a feeder action that is passed a handle and + - should write to it all the data to input to gpg. Finally, runs + - a reader action that is passed a handle to gpg's output. - - Note that to avoid deadlock with the cleanup stage, - - the action must fully consume gpg's input before returning. -} -passphraseHandle :: [CommandParam] -> String -> IO L.ByteString -> (Handle -> IO a) -> IO a -passphraseHandle params passphrase a b = do + - the reader must fully consume gpg's input before returning. -} +feedRead :: [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a +feedRead params passphrase feeder reader = do -- pipe the passphrase into gpg on a fd (frompipe, topipe) <- createPipe void $ forkIO $ do @@ -77,9 +77,9 @@ passphraseHandle params passphrase a b = do where go (to, from) = do void $ forkIO $ do - L.hPut to =<< a + feeder to hClose to - b from + reader from {- Finds gpg public keys matching some string. (Could be an email address, - a key id, or a name. -} diff --git a/debian/changelog b/debian/changelog index dbbb43333..b1547a813 100644 --- a/debian/changelog +++ b/debian/changelog @@ -7,6 +7,8 @@ git-annex (3.20121113) UNRELEASED; urgency=low added, including when new repository configurations are pushed in from remotes. * OSX: Fix RunAtLoad value in plist file. + * Getting a file from chunked directory special remotes no longer buffers + it all in memory. -- Joey Hess <joeyh@debian.org> Tue, 13 Nov 2012 13:17:07 -0400 |