From 9fc8fc98a9279b822e5c0104fee8dab00debda90 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 16 Nov 2012 17:58:08 -0400 Subject: generic chunked content helper However, directory still uses its optimzed chunked file writer, as it uses less memory than the generic one in the helper. --- Remote/Directory.hs | 76 +++++++---------------------- Remote/Helper/Chunked.hs | 121 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 60 deletions(-) create mode 100644 Remote/Helper/Chunked.hs (limited to 'Remote') diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 006638a2f..6bf725379 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -19,8 +19,8 @@ import Config import Utility.FileMode import Remote.Helper.Special import Remote.Helper.Encryptable +import Remote.Helper.Chunked import Crypto -import Utility.DataUnits import Data.Int import Annex.Content @@ -58,19 +58,6 @@ gen r u c = do remotetype = remote } -type ChunkSize = Maybe Int64 - -chunkSize :: Maybe RemoteConfig -> ChunkSize -chunkSize Nothing = Nothing -chunkSize (Just m) = - case M.lookup "chunksize" m of - Nothing -> Nothing - Just v -> case readSize dataUnits v of - Nothing -> error "bad chunksize" - Just size - | size <= 0 -> error "bad chunksize" - | otherwise -> Just $ fromInteger size - directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig directorySetup u c = do -- verify configuration is sane @@ -89,14 +76,6 @@ directorySetup u c = do locations :: FilePath -> Key -> [FilePath] locations d k = map (d ) (keyPaths k) -{- An infinite stream of chunks to use for a given file. -} -chunkStream :: FilePath -> [FilePath] -chunkStream f = map (\n -> f ++ ".chunk" ++ show n) [1 :: Integer ..] - -{- A file that records the number of chunks used. -} -chunkCount :: FilePath -> FilePath -chunkCount f = f ++ ".chunkcount" - withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool withCheckedFiles _ _ [] _ _ = return False withCheckedFiles check Nothing d k a = go $ locations d k @@ -107,18 +86,14 @@ withCheckedFiles check (Just _) d k a = go $ locations d k where go [] = return False go (f:fs) = do - let chunkcount = chunkCount f + let chunkcount = f ++ chunkCount ifM (check chunkcount) ( do - count <- readcount chunkcount - let chunks = take count $ chunkStream f + chunks <- getChunks f <$> readFile chunkcount ifM (all id <$> mapM check chunks) ( a chunks , return False ) , go fs ) - readcount f = fromMaybe (error $ "cannot parse " ++ f) - . (readish :: String -> Maybe Int) - <$> readFile f withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool withStoredFiles = withCheckedFiles doesFileExist @@ -203,45 +178,26 @@ meteredWriteFile' meterupdate dest startstate feeder = meterupdate $ toInteger $ S.length c feed state cs h -{- Generates a list of destinations to write to in order to store a key. - - When chunksize is specified, this list will be a list of chunks. - - The action should store the file, and return a list of the destinations - - it stored it to, or [] on error. - - The stored files are only put into their final place once storage is - - complete. - -} storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool -storeHelper d chunksize key a = prep <&&> check <&&> go +storeHelper d chunksize key storer = check <&&> go where - desttemplate = Prelude.head $ locations d key - dir = parentDir desttemplate - tmpdests = case chunksize of - Nothing -> [desttemplate ++ tmpprefix] - Just _ -> map (++ tmpprefix) (chunkStream desttemplate) - tmpprefix = ".tmp" - detmpprefix f = take (length f - tmpprefixlen) f - tmpprefixlen = length tmpprefix - prep = liftIO $ catchBoolIO $ do - createDirectoryIfMissing True dir - allowWrite dir - return True + basedest = Prelude.head $ locations d key + dir = parentDir basedest {- The size is not exactly known when encrypting the key; - this assumes that at least the size of the key is - needed as free space. -} check = checkDiskSpace (Just dir) key 0 go = liftIO $ catchBoolIO $ do - stored <- a tmpdests - forM_ stored $ \f -> do - let dest = detmpprefix f - renameFile f dest - preventWrite dest - when (chunksize /= Nothing) $ do - let chunkcount = chunkCount desttemplate - _ <- tryIO $ allowWrite chunkcount - writeFile chunkcount (show $ length stored) - preventWrite chunkcount - preventWrite dir - return (not $ null stored) + createDirectoryIfMissing True dir + allowWrite dir + preventWrite dir `after` storeChunks basedest chunksize storer recorder finalizer + finalizer f dest = do + renameFile f dest + preventWrite dest + recorder f s = do + void $ tryIO $ allowWrite f + writeFile f s + preventWrite f retrieve :: FilePath -> ChunkSize -> Key -> AssociatedFile -> FilePath -> Annex Bool retrieve d chunksize k _ f = metered Nothing k $ \meterupdate -> diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs new file mode 100644 index 000000000..59117eaca --- /dev/null +++ b/Remote/Helper/Chunked.hs @@ -0,0 +1,121 @@ +{- git-annex chunked remotes + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.Chunked where + +import Common.Annex +import Utility.DataUnits +import Types.Remote + +import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L +import Data.Int +import qualified Control.Exception as E + +type ChunkSize = Maybe Int64 + +{- Gets a remote's configured chunk size. -} +chunkSize :: Maybe RemoteConfig -> ChunkSize +chunkSize Nothing = Nothing +chunkSize (Just m) = + case M.lookup "chunksize" m of + Nothing -> Nothing + Just v -> case readSize dataUnits v of + Nothing -> error "bad chunksize" + Just size + | size <= 0 -> error "bad chunksize" + | otherwise -> Just $ fromInteger size + +{- This is an extension that's added to the usual file (or whatever) + - where the remote stores a key. -} +type ChunkExt = String + +{- A record of the number of chunks used. + - + - While this can be guessed at based on the size of the key, encryption + - makes that larger. Also, using this helps deal with changes to chunksize + - over the life of a remote. + -} +chunkCount :: ChunkExt +chunkCount = ".chunkcount" + +{- Parses the String from the chunkCount file, and returns the files that + - are used to store the chunks. -} +getChunks :: FilePath -> String -> [FilePath] +getChunks basedest chunkcount = take count $ map (basedest ++) chunkStream + where + count = fromMaybe 0 $ readish chunkcount + +{- An infinite stream of extensions to use for chunks. -} +chunkStream :: [ChunkExt] +chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..] + +{- Given the base destination to use to store a value, + - generates a stream of temporary destinations (just one when not chunking) + - and passes it to an action, which should chunk and store the data, + - and return the destinations it stored to, or [] on error. + - + - Then calles the finalizer to rename the temporary destinations into + - their final places (and do any other cleanup), and writes the chunk count + - (if chunking) + -} +storeChunks :: FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool +storeChunks basedest chunksize storer recorder finalizer = + either (const $ return False) return + =<< (E.try go :: IO (Either E.SomeException Bool)) + where + go = do + stored <- storer tmpdests + forM_ stored $ \d -> do + let dest = detmpprefix d + finalizer d dest + when (chunksize /= Nothing) $ do + let chunkcount = basedest ++ chunkCount + recorder chunkcount (show $ length stored) + return (not $ null stored) + + tmpprefix = ".tmp" + detmpprefix f = take (length f - tmpprefixlen) f + tmpprefixlen = length tmpprefix + tmpdests + | chunksize == Nothing = [basedest ++ tmpprefix] + | otherwise = map (++ tmpprefix) $ map (basedest ++) chunkStream + +{- Given a list of destinations to use, chunks the data according to the + - ChunkSize, and runs the storer action to store each chunk. Returns + - the destinations where data was stored, or [] on error. + - + - This buffers each chunk in memory. + - More optimal versions of this can be written, that rely + - on L.toChunks to split the lazy bytestring into chunks (typically + - smaller than the ChunkSize), and eg, write those chunks to a Handle. + - But this is the best that can be done with the storer interface that + - writes a whole L.ByteString at a time. + -} +storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath] +storeChunked chunksize dests storer content = + either (const $ return []) return + =<< (E.try (go chunksize dests) :: IO (Either E.SomeException [FilePath])) + where + go _ [] = return [] -- no dests!? + + go Nothing (d:_) = do + storer d content + return [d] + + go (Just sz) _ + -- always write a chunk, even if the data is 0 bytes + | L.null content = go Nothing dests + | otherwise = storechunks sz [] dests content + + storechunks _ _ [] _ = return [] -- ran out of dests + storechunks sz useddests (d:ds) b + | L.null b = return $ reverse useddests + | otherwise = do + let (chunk, b') = L.splitAt sz b + storer d chunk + storechunks sz (d:useddests) ds b' -- cgit v1.2.3