From 237cceb715438809f9ddf7b45695f000f65f82b8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 01:12:24 -0400 Subject: better byteRetriever Make the byteRetriever be passed the callback that consumes the bytestring. This way, there's no worries about the lazy bytestring not all being read when the resource that's creating it is closed. Which in turn lets bup, ddar, and S3 each switch from using an unncessary fileRetriver to a byteRetriever. So, more efficient on chunks and encrypted files. The only remaining fileRetrievers are hook and external, which really do retrieve to files. --- Remote/Bup.hs | 13 +++++++------ Remote/Ddar.hs | 12 ++++++------ Remote/Directory.hs | 4 ++-- Remote/Directory/LegacyChunked.hs | 10 +++++----- Remote/Glacier.hs | 33 ++++++++++++++++----------------- Remote/Helper/ChunkedEncryptable.hs | 8 +++++--- Remote/S3.hs | 4 ++-- 7 files changed, 43 insertions(+), 41 deletions(-) (limited to 'Remote') diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 06679c4b8..44ea8c7d8 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -8,6 +8,7 @@ module Remote.Bup (remote) where import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L import Data.ByteString.Lazy.UTF8 (fromString) import Common.Annex @@ -127,12 +128,12 @@ store r buprepo = byteStorer $ \k b p -> do return True retrieve :: BupRepo -> Retriever -retrieve buprepo = fileRetriever $ \d k _p -> - liftIO $ withFile d WriteMode $ \h -> do - let params = bupParams "join" buprepo [Param $ bupRef k] - let p = proc "bup" (toCommand params) - (_, _, _, pid) <- createProcess $ p { std_out = UseHandle h } - forceSuccessProcess p pid +retrieve buprepo = byteRetriever $ \k sink -> do + let params = bupParams "join" buprepo [Param $ bupRef k] + let p = proc "bup" (toCommand params) + (_, Just h, _, pid) <- liftIO $ createProcess $ p { std_out = CreatePipe } + liftIO (hClose h >> forceSuccessProcess p pid) + `after` (sink =<< liftIO (L.hGetContents h)) retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 365506a22..bc4755a81 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -10,8 +10,8 @@ module Remote.Ddar (remote) where import Control.Exception import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L import System.IO.Error -import System.Process import Data.String.Utils import Common.Annex @@ -127,12 +127,12 @@ ddarExtractRemoteCall ddarrepo k = ddarRemoteCall ddarrepo 'x' [Param "--force-stdout", Param $ key2file k] retrieve :: DdarRepo -> Retriever -retrieve ddarrepo = fileRetriever $ \d k _p -> do +retrieve ddarrepo = byteRetriever $ \k sink -> do (cmd, params) <- ddarExtractRemoteCall ddarrepo k - liftIO $ withFile d WriteMode $ \h -> do - let p = (proc cmd $ toCommand params){ std_out = UseHandle h } - (_, _, _, pid) <- Common.Annex.createProcess p - forceSuccessProcess p pid + let p = (proc cmd $ toCommand params) { std_out = CreatePipe } + (_, Just h, _, pid) <- liftIO $ createProcess p + liftIO (hClose h >> forceSuccessProcess p pid) + `after` (sink =<< liftIO (L.hGetContents h)) retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False diff --git a/Remote/Directory.hs b/Remote/Directory.hs index a87987529..78d30b1a1 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -136,8 +136,8 @@ store d chunkconfig k b p = liftIO $ do retrieve :: FilePath -> ChunkConfig -> Preparer Retriever retrieve d (LegacyChunks _) = Legacy.retrieve locations d -retrieve d _ = simplyPrepare $ byteRetriever $ \k -> - liftIO $ L.readFile =<< getLocation d k +retrieve d _ = simplyPrepare $ byteRetriever $ \k sink -> + sink =<< liftIO (L.readFile =<< getLocation d k) retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool -- no cheap retrieval possible for chunks diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index 5c200570c..a19868802 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -94,14 +94,14 @@ retrieve locations d basek a = do tmpdir <- fromRepo $ gitAnnexTmpMiscDir createAnnexDirectory tmpdir let tmp = tmpdir keyFile basek ++ ".directorylegacy.tmp" - a $ Just $ byteRetriever $ \k -> liftIO $ do - void $ withStoredFiles d locations k $ \fs -> do + a $ Just $ byteRetriever $ \k sink -> do + liftIO $ void $ withStoredFiles d locations k $ \fs -> do forM_ fs $ S.appendFile tmp <=< S.readFile return True - b <- L.readFile tmp - nukeFile tmp - return b + b <- liftIO $ L.readFile tmp + liftIO $ nukeFile tmp + sink b checkPresent :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex (Either String Bool) checkPresent d locations k = liftIO $ catchMsgIO $ diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 9b428bd80..592a7db1f 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -20,7 +20,6 @@ import Config.Cost import Remote.Helper.Special import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS -import Crypto import Creds import Utility.Metered import qualified Annex @@ -120,11 +119,10 @@ store r k b p = go =<< glacierEnv c u return True prepareRetrieve :: Remote -> Preparer Retriever -prepareRetrieve r = simplyPrepare $ fileRetriever $ \d k p -> - retrieve r k (readBytes (meteredWriteFile p d)) +prepareRetrieve = simplyPrepare . byteRetriever . retrieve -retrieve :: Remote -> Key -> (Handle -> IO ()) -> Annex () -retrieve r k reader = go =<< glacierEnv c u +retrieve :: Remote -> Key -> (L.ByteString -> Annex Bool) -> Annex Bool +retrieve r k sink = go =<< glacierEnv c u where c = config r u = uuid r @@ -138,17 +136,21 @@ retrieve r k reader = go =<< glacierEnv c u go Nothing = error "cannot retrieve from glacier" go (Just e) = do let cmd = (proc "glacier" (toCommand params)) { env = Just e } - ok <- liftIO $ catchBoolIO $ - withHandle StdoutHandle createProcessSuccess cmd $ \h -> - ifM (hIsEOF h) - ( return False - , do - reader h - return True - ) + (_, Just h, _, pid) <- liftIO $ createProcess cmd + -- Glacier cannot store empty files, so if the output is + -- empty, the content is not available yet. + ok <- ifM (liftIO $ hIsEOF h) + ( return False + , sink =<< liftIO (L.hGetContents h) + ) + liftIO $ hClose h + liftIO $ forceSuccessProcess cmd pid unless ok $ do showLongNote "Recommend you wait up to 4 hours, and then run this command again." - error "not yet available" + return ok + +retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False remove :: Remote -> Key -> Annex Bool remove r k = glacierAction r @@ -159,9 +161,6 @@ remove r k = glacierAction r , Param $ archive r k ] -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False - checkPresent :: Remote -> Key -> Annex (Either String Bool) checkPresent r k = do showAction $ "checking " ++ name r diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index e60771551..9c6ba98a2 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -77,9 +77,11 @@ fileRetriever a k m callback = do a f k m callback (FileContent f) --- A Retriever that generates a L.ByteString containing the Key's content. -byteRetriever :: (Key -> Annex L.ByteString) -> Retriever -byteRetriever a k _m callback = callback =<< (ByteContent <$> a k) +-- A Retriever that generates a lazy ByteString containing the Key's +-- content, and passes it to a callback action which will fully consume it +-- before returning. +byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retriever +byteRetriever a k _m callback = a k (callback . ByteContent) {- The base Remote that is provided to chunkedEncryptableRemote - needs to have storeKey and retreiveKeyFile methods, but they are diff --git a/Remote/S3.hs b/Remote/S3.hs index ed9122cab..68d8ee4bf 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -147,9 +147,9 @@ store (conn, bucket) r k p file = do prepareRetrieve :: Remote -> Preparer Retriever prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) -> - byteRetriever $ \k -> + byteRetriever $ \k sink -> liftIO (getObject conn $ bucketKey r bucket k) - >>= either s3Error (return . obj_data) + >>= either s3Error (sink . obj_data) retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -- cgit v1.2.3