summaryrefslogtreecommitdiff
path: root/Remote/Helper/Chunked.hs
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@debian.org>2014-04-02 21:42:53 +0100
committerGravatar Joey Hess <joeyh@debian.org>2014-04-02 21:42:53 +0100
commit6da7cdf0fbf26f1faf7d5710e6ed488f1a4e9589 (patch)
tree7a903e2eca579335b7ce73d0220854e7a25c3bb9 /Remote/Helper/Chunked.hs
git-annex (5.20140402) unstable; urgency=medium
* unannex, uninit: Avoid committing after every file is unannexed, for massive speedup. * --notify-finish switch will cause desktop notifications after each file upload/download/drop completes (using the dbus Desktop Notifications Specification) * --notify-start switch will show desktop notifications when each file upload/download starts. * webapp: Automatically install Nautilus integration scripts to get and drop files. * tahoe: Pass -d parameter before subcommand; putting it after the subcommand no longer works with tahoe-lafs version 1.10. (Thanks, Alberto Berti) * forget --drop-dead: Avoid removing the dead remote from the trust.log, so that if git remotes for it still exist anywhere, git annex info will still know it's dead and not show it. * git-annex-shell: Make configlist automatically initialize a remote git repository, as long as a git-annex branch has been pushed to it, to simplify setup of remote git repositories, including via gitolite. * add --include-dotfiles: New option, perhaps useful for backups. * Version 5.20140227 broke creation of glacier repositories, not including the datacenter and vault in their configuration. This bug is fixed, but glacier repositories set up with the broken version of git-annex need to have the datacenter and vault set in order to be usable. This can be done using git annex enableremote to add the missing settings. For details, see http://git-annex.branchable.com/bugs/problems_with_glacier/ * Added required content configuration. * assistant: Improve ssh authorized keys line generated in local pairing or for a remote ssh server to set environment variables in an alternative way that works with the non-POSIX fish shell, as well as POSIX shells. # imported from the archive
Diffstat (limited to 'Remote/Helper/Chunked.hs')
-rw-r--r--Remote/Helper/Chunked.hs144
1 files changed, 144 insertions, 0 deletions
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs
new file mode 100644
index 000000000..ad3b04d49
--- /dev/null
+++ b/Remote/Helper/Chunked.hs
@@ -0,0 +1,144 @@
+{- git-annex chunked remotes
+ -
+ - Copyright 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Remote.Helper.Chunked where
+
+import Common.Annex
+import Utility.DataUnits
+import Types.Remote
+import Utility.Metered
+
+import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as L
+import Data.Int
+import qualified Control.Exception as E
+
+type ChunkSize = Maybe Int64
+
+{- Gets a remote's configured chunk size. -}
+chunkSize :: RemoteConfig -> ChunkSize
+chunkSize m =
+ case M.lookup "chunksize" m of
+ Nothing -> Nothing
+ Just v -> case readSize dataUnits v of
+ Nothing -> error "bad chunksize"
+ Just size
+ | size <= 0 -> error "bad chunksize"
+ | otherwise -> Just $ fromInteger size
+
+{- This is an extension that's added to the usual file (or whatever)
+ - where the remote stores a key. -}
+type ChunkExt = String
+
+{- A record of the number of chunks used.
+ -
+ - While this can be guessed at based on the size of the key, encryption
+ - makes that larger. Also, using this helps deal with changes to chunksize
+ - over the life of a remote.
+ -}
+chunkCount :: ChunkExt
+chunkCount = ".chunkcount"
+
+{- An infinite stream of extensions to use for chunks. -}
+chunkStream :: [ChunkExt]
+chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..]
+
+{- Parses the String from the chunkCount file, and returns the files that
+ - are used to store the chunks. -}
+listChunks :: FilePath -> String -> [FilePath]
+listChunks basedest chunkcount = take count $ map (basedest ++) chunkStream
+ where
+ count = fromMaybe 0 $ readish chunkcount
+
+{- For use when there is no chunkCount file; uses the action to find
+ - chunks, and returns them, or Nothing if none found. Relies on
+ - storeChunks's finalizer atomically moving the chunks into place once all
+ - are written.
+ -
+ - This is only needed to work around a bug that caused the chunkCount file
+ - not to be written.
+ -}
+probeChunks :: FilePath -> (FilePath -> IO Bool) -> IO [FilePath]
+probeChunks basedest check = go [] $ map (basedest ++) chunkStream
+ where
+ go l [] = return (reverse l)
+ go l (c:cs) = ifM (check c)
+ ( go (c:l) cs
+ , go l []
+ )
+
+{- Given the base destination to use to store a value,
+ - generates a stream of temporary destinations (just one when not chunking)
+ - and passes it to an action, which should chunk and store the data,
+ - and return the destinations it stored to, or [] on error. Then
+ - calls the recorder to write the chunk count (if chunking). Finally, the
+ - finalizer is called to rename the tmp into the dest
+ - (and do any other cleanup).
+ -}
+storeChunks :: Key -> FilePath -> FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool
+storeChunks key tmp dest chunksize storer recorder finalizer = either onerr return
+ =<< (E.try go :: IO (Either E.SomeException Bool))
+ where
+ go = do
+ stored <- storer tmpdests
+ when (isJust chunksize) $ do
+ let chunkcount = basef ++ chunkCount
+ recorder chunkcount (show $ length stored)
+ finalizer tmp dest
+ return (not $ null stored)
+ onerr e = do
+ print e
+ return False
+
+ basef = tmp ++ keyFile key
+ tmpdests
+ | isNothing chunksize = [basef]
+ | otherwise = map (basef ++ ) chunkStream
+
+{- Given a list of destinations to use, chunks the data according to the
+ - ChunkSize, and runs the storer action to store each chunk. Returns
+ - the destinations where data was stored, or [] on error.
+ -
+ - This buffers each chunk in memory.
+ - More optimal versions of this can be written, that rely
+ - on L.toChunks to split the lazy bytestring into chunks (typically
+ - smaller than the ChunkSize), and eg, write those chunks to a Handle.
+ - But this is the best that can be done with the storer interface that
+ - writes a whole L.ByteString at a time.
+ -}
+storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath]
+storeChunked chunksize dests storer content = either onerr return
+ =<< (E.try (go chunksize dests) :: IO (Either E.SomeException [FilePath]))
+ where
+ go _ [] = return [] -- no dests!?
+ go Nothing (d:_) = do
+ storer d content
+ return [d]
+ go (Just sz) _
+ -- always write a chunk, even if the data is 0 bytes
+ | L.null content = go Nothing dests
+ | otherwise = storechunks sz [] dests content
+
+ onerr e = do
+ print e
+ return []
+
+ storechunks _ _ [] _ = return [] -- ran out of dests
+ storechunks sz useddests (d:ds) b
+ | L.null b = return $ reverse useddests
+ | otherwise = do
+ let (chunk, b') = L.splitAt sz b
+ storer d chunk
+ storechunks sz (d:useddests) ds b'
+
+{- Writes a series of chunks to a file. The feeder is called to get
+ - each chunk. -}
+meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
+meteredWriteFileChunks meterupdate dest chunks feeder =
+ withBinaryFile dest WriteMode $ \h ->
+ forM_ chunks $
+ meteredWrite meterupdate h <=< feeder