diff options
author | Joey Hess <joey@kitenet.net> | 2014-07-27 01:18:38 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2014-07-27 01:18:38 -0400 |
commit | 0a922be11970dc2e3f792301d828f1d187b7fc94 (patch) | |
tree | 6f455f668ffbeffca43567b25723cef5cc188f26 | |
parent | c46eba94f69151ded37988efd7e3375d327faef3 (diff) |
faster storeChunks
No need to process each L.ByteString chunk, instead ask it to split.
Doesn't seem to have really sped things up much, but it also made the code
simpler.
Note that this does (and already did) buffer in memory. It seems that only
the directory special remote could take advantage of streaming chunks to
files w/o buffering, so probably won't add an interface to allow for that.
-rw-r--r-- | Remote/Helper/Chunked.hs | 54 |
1 files changed, 24 insertions, 30 deletions
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 3eab0947a..65d5892c3 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -26,7 +26,6 @@ import Utility.Metered import Crypto (EncKey) import qualified Data.ByteString.Lazy as L -import qualified Data.ByteString as S import qualified Data.Map as M data ChunkConfig @@ -70,6 +69,14 @@ numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream - the storer action, along with a corresponding chunk key and a - progress meter update callback. - + - This buffers each chunk in memory, so can use a lot of memory + - with a large ChunkSize. + - More optimal versions of this can be written, that rely + - on L.toChunks to split the lazy bytestring into chunks (typically + - smaller than the ChunkSize), and eg, write those chunks to a Handle. + - But this is the best that can be done with the storer interface that + - writes a whole L.ByteString at a time. + - - This action may be called on a chunked key. It will simply store it. -} storeChunks @@ -90,39 +97,26 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> _ -> liftIO $ storer k b meterupdate gochunks :: MeterUpdate -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool - gochunks meterupdate chunksize lb = - loop zeroBytesProcessed chunksize (L.toChunks lb) [] + gochunks meterupdate chunksize = loop zeroBytesProcessed . splitchunk where - loop bytesprocessed sz [] c chunkkeys - -- Always store at least one chunk, - -- even for empty content. - | not (null c) || numchunks == 0 = - storechunk bytesprocessed sz [] c chunkkeys - -- Once all chunks are successfully stored, - -- update the chunk log. - | otherwise = do + splitchunk = L.splitAt chunksize + + loop bytesprocessed (chunk, bs) chunkkeys + | L.null chunk && numchunks > 0 = do + -- Once all chunks are successfully + -- stored, update the chunk log. chunksStored u k chunksize numchunks return True + | otherwise = do + let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys + ifM (liftIO $ storer chunkkey chunk meterupdate') + ( do + let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk) + loop bytesprocessed' (splitchunk bs) chunkkeys' + , return False + ) where - numchunks = numChunks chunkkeys - loop bytesprocessed sz (b:bs) c chunkkeys - | s <= sz || sz == chunksize = - loop bytesprocessed sz' bs (b:c) chunkkeys - | otherwise = - storechunk bytesprocessed sz' bs (b:c) chunkkeys - where - s = fromIntegral (S.length b) - sz' = sz - s - - storechunk bytesprocessed sz bs c chunkkeys = do - let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys - ifM (liftIO $ storer chunkkey (L.fromChunks $ reverse c) meterupdate') - ( do - let bytesprocessed' = addBytesProcessed bytesprocessed (chunksize - sz) - loop bytesprocessed' chunksize bs [] chunkkeys' - , return False - ) - where + numchunks = numChunks chunkkeys {- The MeterUpdate that is passed to the action - storing a chunk is offset, so that it reflects - the total bytes that have already been stored |