summaryrefslogtreecommitdiff
path: root/Remote
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-11-16 17:58:08 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-11-16 17:58:08 -0400
commit9fc8fc98a9279b822e5c0104fee8dab00debda90 (patch)
tree891e5bd35975a312bff502fb0afa5369a32a18cc /Remote
parent54399a70122023a5279566080f8ba82f889bccf8 (diff)
generic chunked content helper
However, directory still uses its optimzed chunked file writer, as it uses less memory than the generic one in the helper.
Diffstat (limited to 'Remote')
-rw-r--r--Remote/Directory.hs76
-rw-r--r--Remote/Helper/Chunked.hs121
2 files changed, 137 insertions, 60 deletions
diff --git a/Remote/Directory.hs b/Remote/Directory.hs
index 006638a2f..6bf725379 100644
--- a/Remote/Directory.hs
+++ b/Remote/Directory.hs
@@ -19,8 +19,8 @@ import Config
import Utility.FileMode
import Remote.Helper.Special
import Remote.Helper.Encryptable
+import Remote.Helper.Chunked
import Crypto
-import Utility.DataUnits
import Data.Int
import Annex.Content
@@ -58,19 +58,6 @@ gen r u c = do
remotetype = remote
}
-type ChunkSize = Maybe Int64
-
-chunkSize :: Maybe RemoteConfig -> ChunkSize
-chunkSize Nothing = Nothing
-chunkSize (Just m) =
- case M.lookup "chunksize" m of
- Nothing -> Nothing
- Just v -> case readSize dataUnits v of
- Nothing -> error "bad chunksize"
- Just size
- | size <= 0 -> error "bad chunksize"
- | otherwise -> Just $ fromInteger size
-
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
directorySetup u c = do
-- verify configuration is sane
@@ -89,14 +76,6 @@ directorySetup u c = do
locations :: FilePath -> Key -> [FilePath]
locations d k = map (d </>) (keyPaths k)
-{- An infinite stream of chunks to use for a given file. -}
-chunkStream :: FilePath -> [FilePath]
-chunkStream f = map (\n -> f ++ ".chunk" ++ show n) [1 :: Integer ..]
-
-{- A file that records the number of chunks used. -}
-chunkCount :: FilePath -> FilePath
-chunkCount f = f ++ ".chunkcount"
-
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withCheckedFiles _ _ [] _ _ = return False
withCheckedFiles check Nothing d k a = go $ locations d k
@@ -107,18 +86,14 @@ withCheckedFiles check (Just _) d k a = go $ locations d k
where
go [] = return False
go (f:fs) = do
- let chunkcount = chunkCount f
+ let chunkcount = f ++ chunkCount
ifM (check chunkcount)
( do
- count <- readcount chunkcount
- let chunks = take count $ chunkStream f
+ chunks <- getChunks f <$> readFile chunkcount
ifM (all id <$> mapM check chunks)
( a chunks , return False )
, go fs
)
- readcount f = fromMaybe (error $ "cannot parse " ++ f)
- . (readish :: String -> Maybe Int)
- <$> readFile f
withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withStoredFiles = withCheckedFiles doesFileExist
@@ -203,45 +178,26 @@ meteredWriteFile' meterupdate dest startstate feeder =
meterupdate $ toInteger $ S.length c
feed state cs h
-{- Generates a list of destinations to write to in order to store a key.
- - When chunksize is specified, this list will be a list of chunks.
- - The action should store the file, and return a list of the destinations
- - it stored it to, or [] on error.
- - The stored files are only put into their final place once storage is
- - complete.
- -}
storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool
-storeHelper d chunksize key a = prep <&&> check <&&> go
+storeHelper d chunksize key storer = check <&&> go
where
- desttemplate = Prelude.head $ locations d key
- dir = parentDir desttemplate
- tmpdests = case chunksize of
- Nothing -> [desttemplate ++ tmpprefix]
- Just _ -> map (++ tmpprefix) (chunkStream desttemplate)
- tmpprefix = ".tmp"
- detmpprefix f = take (length f - tmpprefixlen) f
- tmpprefixlen = length tmpprefix
- prep = liftIO $ catchBoolIO $ do
- createDirectoryIfMissing True dir
- allowWrite dir
- return True
+ basedest = Prelude.head $ locations d key
+ dir = parentDir basedest
{- The size is not exactly known when encrypting the key;
- this assumes that at least the size of the key is
- needed as free space. -}
check = checkDiskSpace (Just dir) key 0
go = liftIO $ catchBoolIO $ do
- stored <- a tmpdests
- forM_ stored $ \f -> do
- let dest = detmpprefix f
- renameFile f dest
- preventWrite dest
- when (chunksize /= Nothing) $ do
- let chunkcount = chunkCount desttemplate
- _ <- tryIO $ allowWrite chunkcount
- writeFile chunkcount (show $ length stored)
- preventWrite chunkcount
- preventWrite dir
- return (not $ null stored)
+ createDirectoryIfMissing True dir
+ allowWrite dir
+ preventWrite dir `after` storeChunks basedest chunksize storer recorder finalizer
+ finalizer f dest = do
+ renameFile f dest
+ preventWrite dest
+ recorder f s = do
+ void $ tryIO $ allowWrite f
+ writeFile f s
+ preventWrite f
retrieve :: FilePath -> ChunkSize -> Key -> AssociatedFile -> FilePath -> Annex Bool
retrieve d chunksize k _ f = metered Nothing k $ \meterupdate ->
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs
new file mode 100644
index 000000000..59117eaca
--- /dev/null
+++ b/Remote/Helper/Chunked.hs
@@ -0,0 +1,121 @@
+{- git-annex chunked remotes
+ -
+ - Copyright 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Remote.Helper.Chunked where
+
+import Common.Annex
+import Utility.DataUnits
+import Types.Remote
+
+import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as L
+import Data.Int
+import qualified Control.Exception as E
+
+type ChunkSize = Maybe Int64
+
+{- Gets a remote's configured chunk size. -}
+chunkSize :: Maybe RemoteConfig -> ChunkSize
+chunkSize Nothing = Nothing
+chunkSize (Just m) =
+ case M.lookup "chunksize" m of
+ Nothing -> Nothing
+ Just v -> case readSize dataUnits v of
+ Nothing -> error "bad chunksize"
+ Just size
+ | size <= 0 -> error "bad chunksize"
+ | otherwise -> Just $ fromInteger size
+
+{- This is an extension that's added to the usual file (or whatever)
+ - where the remote stores a key. -}
+type ChunkExt = String
+
+{- A record of the number of chunks used.
+ -
+ - While this can be guessed at based on the size of the key, encryption
+ - makes that larger. Also, using this helps deal with changes to chunksize
+ - over the life of a remote.
+ -}
+chunkCount :: ChunkExt
+chunkCount = ".chunkcount"
+
+{- Parses the String from the chunkCount file, and returns the files that
+ - are used to store the chunks. -}
+getChunks :: FilePath -> String -> [FilePath]
+getChunks basedest chunkcount = take count $ map (basedest ++) chunkStream
+ where
+ count = fromMaybe 0 $ readish chunkcount
+
+{- An infinite stream of extensions to use for chunks. -}
+chunkStream :: [ChunkExt]
+chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..]
+
+{- Given the base destination to use to store a value,
+ - generates a stream of temporary destinations (just one when not chunking)
+ - and passes it to an action, which should chunk and store the data,
+ - and return the destinations it stored to, or [] on error.
+ -
+ - Then calles the finalizer to rename the temporary destinations into
+ - their final places (and do any other cleanup), and writes the chunk count
+ - (if chunking)
+ -}
+storeChunks :: FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool
+storeChunks basedest chunksize storer recorder finalizer =
+ either (const $ return False) return
+ =<< (E.try go :: IO (Either E.SomeException Bool))
+ where
+ go = do
+ stored <- storer tmpdests
+ forM_ stored $ \d -> do
+ let dest = detmpprefix d
+ finalizer d dest
+ when (chunksize /= Nothing) $ do
+ let chunkcount = basedest ++ chunkCount
+ recorder chunkcount (show $ length stored)
+ return (not $ null stored)
+
+ tmpprefix = ".tmp"
+ detmpprefix f = take (length f - tmpprefixlen) f
+ tmpprefixlen = length tmpprefix
+ tmpdests
+ | chunksize == Nothing = [basedest ++ tmpprefix]
+ | otherwise = map (++ tmpprefix) $ map (basedest ++) chunkStream
+
+{- Given a list of destinations to use, chunks the data according to the
+ - ChunkSize, and runs the storer action to store each chunk. Returns
+ - the destinations where data was stored, or [] on error.
+ -
+ - This buffers each chunk in memory.
+ - More optimal versions of this can be written, that rely
+ - on L.toChunks to split the lazy bytestring into chunks (typically
+ - smaller than the ChunkSize), and eg, write those chunks to a Handle.
+ - But this is the best that can be done with the storer interface that
+ - writes a whole L.ByteString at a time.
+ -}
+storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath]
+storeChunked chunksize dests storer content =
+ either (const $ return []) return
+ =<< (E.try (go chunksize dests) :: IO (Either E.SomeException [FilePath]))
+ where
+ go _ [] = return [] -- no dests!?
+
+ go Nothing (d:_) = do
+ storer d content
+ return [d]
+
+ go (Just sz) _
+ -- always write a chunk, even if the data is 0 bytes
+ | L.null content = go Nothing dests
+ | otherwise = storechunks sz [] dests content
+
+ storechunks _ _ [] _ = return [] -- ran out of dests
+ storechunks sz useddests (d:ds) b
+ | L.null b = return $ reverse useddests
+ | otherwise = do
+ let (chunk, b') = L.splitAt sz b
+ storer d chunk
+ storechunks sz (d:useddests) ds b'