diff options
author | Joey Hess <joey@kitenet.net> | 2012-11-16 17:58:08 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2012-11-16 17:58:08 -0400 |
commit | 9fc8fc98a9279b822e5c0104fee8dab00debda90 (patch) | |
tree | 891e5bd35975a312bff502fb0afa5369a32a18cc /Remote/Helper/Chunked.hs | |
parent | 54399a70122023a5279566080f8ba82f889bccf8 (diff) |
generic chunked content helper
However, directory still uses its optimzed chunked file writer, as it uses
less memory than the generic one in the helper.
Diffstat (limited to 'Remote/Helper/Chunked.hs')
-rw-r--r-- | Remote/Helper/Chunked.hs | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs new file mode 100644 index 000000000..59117eaca --- /dev/null +++ b/Remote/Helper/Chunked.hs @@ -0,0 +1,121 @@ +{- git-annex chunked remotes + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.Chunked where + +import Common.Annex +import Utility.DataUnits +import Types.Remote + +import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L +import Data.Int +import qualified Control.Exception as E + +type ChunkSize = Maybe Int64 + +{- Gets a remote's configured chunk size. -} +chunkSize :: Maybe RemoteConfig -> ChunkSize +chunkSize Nothing = Nothing +chunkSize (Just m) = + case M.lookup "chunksize" m of + Nothing -> Nothing + Just v -> case readSize dataUnits v of + Nothing -> error "bad chunksize" + Just size + | size <= 0 -> error "bad chunksize" + | otherwise -> Just $ fromInteger size + +{- This is an extension that's added to the usual file (or whatever) + - where the remote stores a key. -} +type ChunkExt = String + +{- A record of the number of chunks used. + - + - While this can be guessed at based on the size of the key, encryption + - makes that larger. Also, using this helps deal with changes to chunksize + - over the life of a remote. + -} +chunkCount :: ChunkExt +chunkCount = ".chunkcount" + +{- Parses the String from the chunkCount file, and returns the files that + - are used to store the chunks. -} +getChunks :: FilePath -> String -> [FilePath] +getChunks basedest chunkcount = take count $ map (basedest ++) chunkStream + where + count = fromMaybe 0 $ readish chunkcount + +{- An infinite stream of extensions to use for chunks. -} +chunkStream :: [ChunkExt] +chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..] + +{- Given the base destination to use to store a value, + - generates a stream of temporary destinations (just one when not chunking) + - and passes it to an action, which should chunk and store the data, + - and return the destinations it stored to, or [] on error. + - + - Then calles the finalizer to rename the temporary destinations into + - their final places (and do any other cleanup), and writes the chunk count + - (if chunking) + -} +storeChunks :: FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool +storeChunks basedest chunksize storer recorder finalizer = + either (const $ return False) return + =<< (E.try go :: IO (Either E.SomeException Bool)) + where + go = do + stored <- storer tmpdests + forM_ stored $ \d -> do + let dest = detmpprefix d + finalizer d dest + when (chunksize /= Nothing) $ do + let chunkcount = basedest ++ chunkCount + recorder chunkcount (show $ length stored) + return (not $ null stored) + + tmpprefix = ".tmp" + detmpprefix f = take (length f - tmpprefixlen) f + tmpprefixlen = length tmpprefix + tmpdests + | chunksize == Nothing = [basedest ++ tmpprefix] + | otherwise = map (++ tmpprefix) $ map (basedest ++) chunkStream + +{- Given a list of destinations to use, chunks the data according to the + - ChunkSize, and runs the storer action to store each chunk. Returns + - the destinations where data was stored, or [] on error. + - + - This buffers each chunk in memory. + - More optimal versions of this can be written, that rely + - on L.toChunks to split the lazy bytestring into chunks (typically + - smaller than the ChunkSize), and eg, write those chunks to a Handle. + - But this is the best that can be done with the storer interface that + - writes a whole L.ByteString at a time. + -} +storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath] +storeChunked chunksize dests storer content = + either (const $ return []) return + =<< (E.try (go chunksize dests) :: IO (Either E.SomeException [FilePath])) + where + go _ [] = return [] -- no dests!? + + go Nothing (d:_) = do + storer d content + return [d] + + go (Just sz) _ + -- always write a chunk, even if the data is 0 bytes + | L.null content = go Nothing dests + | otherwise = storechunks sz [] dests content + + storechunks _ _ [] _ = return [] -- ran out of dests + storechunks sz useddests (d:ds) b + | L.null b = return $ reverse useddests + | otherwise = do + let (chunk, b') = L.splitAt sz b + storer d chunk + storechunks sz (d:useddests) ds b' |