diff options
Diffstat (limited to 'Remote/Helper/Chunked.hs')
-rw-r--r-- | Remote/Helper/Chunked.hs | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs new file mode 100644 index 000000000..ad3b04d49 --- /dev/null +++ b/Remote/Helper/Chunked.hs @@ -0,0 +1,144 @@ +{- git-annex chunked remotes + - + - Copyright 2012 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.Chunked where + +import Common.Annex +import Utility.DataUnits +import Types.Remote +import Utility.Metered + +import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L +import Data.Int +import qualified Control.Exception as E + +type ChunkSize = Maybe Int64 + +{- Gets a remote's configured chunk size. -} +chunkSize :: RemoteConfig -> ChunkSize +chunkSize m = + case M.lookup "chunksize" m of + Nothing -> Nothing + Just v -> case readSize dataUnits v of + Nothing -> error "bad chunksize" + Just size + | size <= 0 -> error "bad chunksize" + | otherwise -> Just $ fromInteger size + +{- This is an extension that's added to the usual file (or whatever) + - where the remote stores a key. -} +type ChunkExt = String + +{- A record of the number of chunks used. + - + - While this can be guessed at based on the size of the key, encryption + - makes that larger. Also, using this helps deal with changes to chunksize + - over the life of a remote. + -} +chunkCount :: ChunkExt +chunkCount = ".chunkcount" + +{- An infinite stream of extensions to use for chunks. -} +chunkStream :: [ChunkExt] +chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..] + +{- Parses the String from the chunkCount file, and returns the files that + - are used to store the chunks. -} +listChunks :: FilePath -> String -> [FilePath] +listChunks basedest chunkcount = take count $ map (basedest ++) chunkStream + where + count = fromMaybe 0 $ readish chunkcount + +{- For use when there is no chunkCount file; uses the action to find + - chunks, and returns them, or Nothing if none found. Relies on + - storeChunks's finalizer atomically moving the chunks into place once all + - are written. + - + - This is only needed to work around a bug that caused the chunkCount file + - not to be written. + -} +probeChunks :: FilePath -> (FilePath -> IO Bool) -> IO [FilePath] +probeChunks basedest check = go [] $ map (basedest ++) chunkStream + where + go l [] = return (reverse l) + go l (c:cs) = ifM (check c) + ( go (c:l) cs + , go l [] + ) + +{- Given the base destination to use to store a value, + - generates a stream of temporary destinations (just one when not chunking) + - and passes it to an action, which should chunk and store the data, + - and return the destinations it stored to, or [] on error. Then + - calls the recorder to write the chunk count (if chunking). Finally, the + - finalizer is called to rename the tmp into the dest + - (and do any other cleanup). + -} +storeChunks :: Key -> FilePath -> FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool +storeChunks key tmp dest chunksize storer recorder finalizer = either onerr return + =<< (E.try go :: IO (Either E.SomeException Bool)) + where + go = do + stored <- storer tmpdests + when (isJust chunksize) $ do + let chunkcount = basef ++ chunkCount + recorder chunkcount (show $ length stored) + finalizer tmp dest + return (not $ null stored) + onerr e = do + print e + return False + + basef = tmp ++ keyFile key + tmpdests + | isNothing chunksize = [basef] + | otherwise = map (basef ++ ) chunkStream + +{- Given a list of destinations to use, chunks the data according to the + - ChunkSize, and runs the storer action to store each chunk. Returns + - the destinations where data was stored, or [] on error. + - + - This buffers each chunk in memory. + - More optimal versions of this can be written, that rely + - on L.toChunks to split the lazy bytestring into chunks (typically + - smaller than the ChunkSize), and eg, write those chunks to a Handle. + - But this is the best that can be done with the storer interface that + - writes a whole L.ByteString at a time. + -} +storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath] +storeChunked chunksize dests storer content = either onerr return + =<< (E.try (go chunksize dests) :: IO (Either E.SomeException [FilePath])) + where + go _ [] = return [] -- no dests!? + go Nothing (d:_) = do + storer d content + return [d] + go (Just sz) _ + -- always write a chunk, even if the data is 0 bytes + | L.null content = go Nothing dests + | otherwise = storechunks sz [] dests content + + onerr e = do + print e + return [] + + storechunks _ _ [] _ = return [] -- ran out of dests + storechunks sz useddests (d:ds) b + | L.null b = return $ reverse useddests + | otherwise = do + let (chunk, b') = L.splitAt sz b + storer d chunk + storechunks sz (d:useddests) ds b' + +{- Writes a series of chunks to a file. The feeder is called to get + - each chunk. -} +meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO () +meteredWriteFileChunks meterupdate dest chunks feeder = + withBinaryFile dest WriteMode $ \h -> + forM_ chunks $ + meteredWrite meterupdate h <=< feeder |