From af777a1134bfd21cebff29dadcdf0d3f8ac05c48 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 20:10:14 -0400 Subject: optimise case of remote that retrieves FileContent, when chunks and encryption are not being used No need to read whole FileContent only to write it back out to a file in this case. Can just rename! Yay. Also indidentially, fixed an attempt to open a file for write that was already opened for write, which caused a crash and deadlock. --- Remote/Helper/Chunked.hs | 63 +++++++++++++++++----------------- Remote/Helper/ChunkedEncryptable.hs | 67 ++++++++++++++++++++++++++----------- Types/StoreRetrieve.hs | 4 +++ 3 files changed, 84 insertions(+), 50 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index b7522aa89..00c089e80 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -221,7 +221,7 @@ retrieveChunks -> Key -> FilePath -> MeterUpdate - -> (Handle -> Maybe MeterUpdate -> L.ByteString -> IO ()) + -> (Maybe Handle -> Maybe MeterUpdate -> ContentSource -> Annex Bool) -> Annex Bool retrieveChunks retriever u chunkconfig encryptor basek dest basep sink | noChunks chunkconfig = @@ -244,34 +244,37 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink firstavail _ [] = return False firstavail currsize ([]:ls) = firstavail currsize ls - firstavail currsize ((k:ks):ls) = do - let offset = resumeOffset currsize k - let p = maybe basep - (offsetMeterUpdate basep . toBytesProcessed) - offset - v <- tryNonAsyncAnnex $ - retriever (encryptor k) p $ \content -> - bracketIO (maybe opennew openresume offset) hClose $ \h -> do - tosink h p content - let sz = toBytesProcessed $ - fromMaybe 0 $ keyChunkSize k - getrest p h sz sz ks - `catchNonAsyncAnnex` giveup - case v of - Left e - | null ls -> giveup e - | otherwise -> firstavail currsize ls - Right r -> return r + firstavail currsize ((k:ks):ls) + | k == basek = getunchunked + `catchNonAsyncAnnex` (const $ firstavail currsize ls) + | otherwise = do + let offset = resumeOffset currsize k + let p = maybe basep + (offsetMeterUpdate basep . toBytesProcessed) + offset + v <- tryNonAsyncAnnex $ + retriever (encryptor k) p $ \content -> + bracketIO (maybe opennew openresume offset) hClose $ \h -> do + void $ tosink (Just h) p content + let sz = toBytesProcessed $ + fromMaybe 0 $ keyChunkSize k + getrest p h sz sz ks + `catchNonAsyncAnnex` giveup + case v of + Left e + | null ls -> giveup e + | otherwise -> firstavail currsize ls + Right r -> return r getrest _ _ _ _ [] = return True getrest p h sz bytesprocessed (k:ks) = do let p' = offsetMeterUpdate p bytesprocessed - retriever (encryptor k) p' $ tosink h p' - getrest p h sz (addBytesProcessed bytesprocessed sz) ks + ifM (retriever (encryptor k) p' $ tosink (Just h) p') + ( getrest p h sz (addBytesProcessed bytesprocessed sz) ks + , giveup "chunk retrieval failed" + ) - getunchunked = bracketIO opennew hClose $ \h -> do - retriever (encryptor basek) basep $ tosink h basep - return True + getunchunked = retriever (encryptor basek) basep $ tosink Nothing basep opennew = openBinaryFile dest WriteMode @@ -290,13 +293,11 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink - it is not responsible for updating progress (often it cannot). - Instead, the sink is passed a meter to update as it consumes - the ByteString. -} - tosink h p (ByteContent b) = liftIO $ do - sink h (Just p) b - return True - tosink h _ (FileContent f) = liftIO $ do - sink h Nothing =<< L.readFile f - nukeFile f - return True + tosink h p content = sink h p' content + where + p' + | isByteContent content = Just p + | otherwise = Nothing {- Can resume when the chunk's offset is at or before the end of - the dest file. -} diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index 550a6934b..ca73802ba 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -18,9 +18,9 @@ module Remote.Helper.ChunkedEncryptable ( byteStorer, fileRetriever, byteRetriever, - chunkedEncryptableRemote, storeKeyDummy, retreiveKeyFileDummy, + chunkedEncryptableRemote, module X ) where @@ -36,6 +36,7 @@ import Annex.Content import Annex.Exception import qualified Data.ByteString.Lazy as L +import Control.Exception (bracket) -- Use when nothing needs to be done to prepare a helper. simplyPrepare :: helper -> Preparer helper @@ -78,6 +79,16 @@ withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a withBytes (ByteContent b) a = a b withBytes (FileContent f) a = a =<< liftIO (L.readFile f) +{- The base Remote that is provided to chunkedEncryptableRemote + - needs to have storeKey and retreiveKeyFile methods, but they are + - never actually used (since chunkedEncryptableRemote replaces + - them). Here are some dummy ones. + -} +storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool +storeKeyDummy _ _ _ = return False +retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool +retreiveKeyFileDummy _ _ _ _ = return False + -- Modifies a base Remote to support both chunking and encryption. chunkedEncryptableRemote :: RemoteConfig @@ -131,17 +142,8 @@ chunkedEncryptableRemote c preparestorer prepareretriever baser = encr where go (Just retriever) = metered (Just p) k $ \p' -> retrieveChunks retriever (uuid baser) chunkconfig - enck k dest p' sink + enck k dest p' (sink dest enc) go Nothing = return False - sink h mp b = do - let write = case mp of - Just p' -> meteredWrite p' h - Nothing -> L.hPut h - case enc of - Nothing -> write b - Just (cipher, _) -> - decrypt cipher (feedBytes b) $ - readBytes write enck = maybe id snd enc removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k @@ -154,12 +156,39 @@ chunkedEncryptableRemote c preparestorer prepareretriever baser = encr enck = maybe id snd enc checker = hasKey baser -{- The base Remote that is provided to chunkedEncryptableRemote - - needs to have storeKey and retreiveKeyFile methods, but they are - - never actually used (since chunkedEncryptableRemote replaces - - them). Here are some dummy ones. +{- Sink callback for retrieveChunks. Stores the file content into the + - provided Handle, decrypting it first if necessary. + - + - If the remote did not store the content using chunks, no Handle + - will be provided, and it's up to us to open the destination file. + - + - Note that when neither chunking nor encryption is used, and the remote + - provides FileContent, that file only needs to be renamed + - into place. (And it may even already be in the right place..) -} -storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool -storeKeyDummy _ _ _ = return False -retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retreiveKeyFileDummy _ _ _ _ = return False +sink + :: FilePath + -> Maybe (Cipher, EncKey) + -> Maybe Handle + -> Maybe MeterUpdate + -> ContentSource + -> Annex Bool +sink dest enc mh mp content = do + case (enc, mh, content) of + (Nothing, Nothing, FileContent f) + | f == dest -> noop + | otherwise -> liftIO $ moveFile f dest + (Just (cipher, _), _, _) -> + withBytes content $ \b -> + decrypt cipher (feedBytes b) $ + readBytes write + (Nothing, _, _) -> withBytes content write + return True + where + write b = case mh of + Just h -> liftIO $ b `streamto` h + Nothing -> liftIO $ bracket opendest hClose (b `streamto`) + streamto b h = case mp of + Just p -> meteredWrite p h b + Nothing -> L.hPut h b + opendest = openBinaryFile dest WriteMode diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index 0ee2fd501..33f66efb1 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -23,6 +23,10 @@ data ContentSource = FileContent FilePath | ByteContent L.ByteString +isByteContent :: ContentSource -> Bool +isByteContent (ByteContent _) = True +isByteContent (FileContent _) = False + -- Action that stores a Key's content on a remote. -- Can throw exceptions. type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool -- cgit v1.2.3