summaryrefslogtreecommitdiff
path: root/Remote/Helper/Chunked.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Remote/Helper/Chunked.hs')
-rw-r--r--Remote/Helper/Chunked.hs144
1 files changed, 144 insertions, 0 deletions
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs
new file mode 100644
index 000000000..ad3b04d49
--- /dev/null
+++ b/Remote/Helper/Chunked.hs
@@ -0,0 +1,144 @@
+{- git-annex chunked remotes
+ -
+ - Copyright 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Remote.Helper.Chunked where
+
+import Common.Annex
+import Utility.DataUnits
+import Types.Remote
+import Utility.Metered
+
+import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as L
+import Data.Int
+import qualified Control.Exception as E
+
+type ChunkSize = Maybe Int64
+
+{- Gets a remote's configured chunk size. -}
+chunkSize :: RemoteConfig -> ChunkSize
+chunkSize m =
+ case M.lookup "chunksize" m of
+ Nothing -> Nothing
+ Just v -> case readSize dataUnits v of
+ Nothing -> error "bad chunksize"
+ Just size
+ | size <= 0 -> error "bad chunksize"
+ | otherwise -> Just $ fromInteger size
+
+{- This is an extension that's added to the usual file (or whatever)
+ - where the remote stores a key. -}
+type ChunkExt = String
+
+{- A record of the number of chunks used.
+ -
+ - While this can be guessed at based on the size of the key, encryption
+ - makes that larger. Also, using this helps deal with changes to chunksize
+ - over the life of a remote.
+ -}
+chunkCount :: ChunkExt
+chunkCount = ".chunkcount"
+
+{- An infinite stream of extensions to use for chunks. -}
+chunkStream :: [ChunkExt]
+chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..]
+
+{- Parses the String from the chunkCount file, and returns the files that
+ - are used to store the chunks. -}
+listChunks :: FilePath -> String -> [FilePath]
+listChunks basedest chunkcount = take count $ map (basedest ++) chunkStream
+ where
+ count = fromMaybe 0 $ readish chunkcount
+
+{- For use when there is no chunkCount file; uses the action to find
+ - chunks, and returns them, or Nothing if none found. Relies on
+ - storeChunks's finalizer atomically moving the chunks into place once all
+ - are written.
+ -
+ - This is only needed to work around a bug that caused the chunkCount file
+ - not to be written.
+ -}
+probeChunks :: FilePath -> (FilePath -> IO Bool) -> IO [FilePath]
+probeChunks basedest check = go [] $ map (basedest ++) chunkStream
+ where
+ go l [] = return (reverse l)
+ go l (c:cs) = ifM (check c)
+ ( go (c:l) cs
+ , go l []
+ )
+
+{- Given the base destination to use to store a value,
+ - generates a stream of temporary destinations (just one when not chunking)
+ - and passes it to an action, which should chunk and store the data,
+ - and return the destinations it stored to, or [] on error. Then
+ - calls the recorder to write the chunk count (if chunking). Finally, the
+ - finalizer is called to rename the tmp into the dest
+ - (and do any other cleanup).
+ -}
+storeChunks :: Key -> FilePath -> FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool
+storeChunks key tmp dest chunksize storer recorder finalizer = either onerr return
+ =<< (E.try go :: IO (Either E.SomeException Bool))
+ where
+ go = do
+ stored <- storer tmpdests
+ when (isJust chunksize) $ do
+ let chunkcount = basef ++ chunkCount
+ recorder chunkcount (show $ length stored)
+ finalizer tmp dest
+ return (not $ null stored)
+ onerr e = do
+ print e
+ return False
+
+ basef = tmp ++ keyFile key
+ tmpdests
+ | isNothing chunksize = [basef]
+ | otherwise = map (basef ++ ) chunkStream
+
+{- Given a list of destinations to use, chunks the data according to the
+ - ChunkSize, and runs the storer action to store each chunk. Returns
+ - the destinations where data was stored, or [] on error.
+ -
+ - This buffers each chunk in memory.
+ - More optimal versions of this can be written, that rely
+ - on L.toChunks to split the lazy bytestring into chunks (typically
+ - smaller than the ChunkSize), and eg, write those chunks to a Handle.
+ - But this is the best that can be done with the storer interface that
+ - writes a whole L.ByteString at a time.
+ -}
+storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath]
+storeChunked chunksize dests storer content = either onerr return
+ =<< (E.try (go chunksize dests) :: IO (Either E.SomeException [FilePath]))
+ where
+ go _ [] = return [] -- no dests!?
+ go Nothing (d:_) = do
+ storer d content
+ return [d]
+ go (Just sz) _
+ -- always write a chunk, even if the data is 0 bytes
+ | L.null content = go Nothing dests
+ | otherwise = storechunks sz [] dests content
+
+ onerr e = do
+ print e
+ return []
+
+ storechunks _ _ [] _ = return [] -- ran out of dests
+ storechunks sz useddests (d:ds) b
+ | L.null b = return $ reverse useddests
+ | otherwise = do
+ let (chunk, b') = L.splitAt sz b
+ storer d chunk
+ storechunks sz (d:useddests) ds b'
+
+{- Writes a series of chunks to a file. The feeder is called to get
+ - each chunk. -}
+meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
+meteredWriteFileChunks meterupdate dest chunks feeder =
+ withBinaryFile dest WriteMode $ \h ->
+ forM_ chunks $
+ meteredWrite meterupdate h <=< feeder