diff options
-rw-r--r-- | Remote/Helper/Chunked.hs | 97 | ||||
-rw-r--r-- | Remote/Helper/ChunkedEncryptable.hs | 4 | ||||
-rw-r--r-- | Types/Key.hs | 11 | ||||
-rw-r--r-- | Utility/Metered.hs | 2 | ||||
-rw-r--r-- | debian/changelog | 4 | ||||
-rw-r--r-- | doc/chunking.mdwn | 2 |
6 files changed, 88 insertions, 32 deletions
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 3415c2df6..9ba6d9cbd 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -1,6 +1,6 @@ {- git-annex chunked remotes - - - Copyright 2012-2014 Joey Hess <joey@kitenet.net> + - Copyright 2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} @@ -27,6 +27,7 @@ import Annex.Exception import qualified Data.ByteString.Lazy as L import qualified Data.Map as M +import Control.Exception data ChunkConfig = NoChunks @@ -147,12 +148,16 @@ removeChunks remover u chunkconfig encryptor k = do - - When the remote is chunked, tries each of the options returned by - chunkKeys until it finds one where the retriever successfully - - gets the first key in the list. The content of that key, and any + - gets the first chunked key. The content of that key, and any - other chunks in the list is fed to the sink. - - If retrival of one of the subsequent chunks throws an exception, - gives up and returns False. Note that partial data may have been - written to the sink in this case. + - + - Resuming is supported when using chunks. When the destination file + - already exists, it skips to the next chunked key that would be needed + - to resume. -} retrieveChunks :: (Key -> IO L.ByteString) @@ -160,43 +165,88 @@ retrieveChunks -> ChunkConfig -> EncKey -> Key + -> FilePath -> MeterUpdate - -> (MeterUpdate -> L.ByteString -> IO ()) + -> (Handle -> MeterUpdate -> L.ByteString -> IO ()) -> Annex Bool -retrieveChunks retriever u chunkconfig encryptor basek basep sink +retrieveChunks retriever u chunkconfig encryptor basek dest basep sink | noChunks chunkconfig = -- Optimisation: Try the unchunked key first, to avoid - -- looking in the git-annex branch for chunk counts. - liftIO (retriever (encryptor basek) >>= sink basep >> return True) - `catchNonAsyncAnnex` - const (go =<< chunkKeysOnly u basek) + -- looking in the git-annex branch for chunk counts + -- that are likely not there. + getunchunked `catchNonAsyncAnnex` + const (go =<< chunkKeysOnly u basek) | otherwise = go =<< chunkKeys u chunkconfig basek where - go ls = liftIO $ firstavail ls `catchNonAsync` giveup + go ls = liftIO $ do + currsize <- catchMaybeIO $ + toInteger . fileSize <$> getFileStatus dest + let ls' = maybe ls (setupResume ls) currsize + firstavail currsize ls' `catchNonAsync` giveup giveup e = do warningIO (show e) return False - firstavail [] = return False - firstavail ([]:ls) = firstavail ls - firstavail ((k:ks):ls) = do + firstavail _ [] = return False + firstavail currsize ([]:ls) = firstavail currsize ls + firstavail currsize ((k:ks):ls) = do v <- tryNonAsync $ retriever (encryptor k) case v of Left e | null ls -> giveup e - | otherwise -> firstavail ls + | otherwise -> firstavail currsize ls Right b -> do - sink basep b - let sz = toBytesProcessed $ - fromMaybe 0 $ keyChunkSize k - getrest sz sz ks + let offset = resumeOffset currsize k + let p = maybe basep + (offsetMeterUpdate basep . toBytesProcessed) + offset + bracket (maybe opennew openresume offset) hClose $ \h -> do + sink h p b + let sz = toBytesProcessed $ + fromMaybe 0 $ keyChunkSize k + getrest p h sz sz ks + + getrest _ _ _ _ [] = return True + getrest p h sz bytesprocessed (k:ks) = do + let p' = offsetMeterUpdate p bytesprocessed + sink h p' =<< retriever (encryptor k) + getrest p h sz (addBytesProcessed bytesprocessed sz) ks + + getunchunked = liftIO $ bracket opennew hClose $ \h -> do + retriever (encryptor basek) >>= sink h basep + return True + + opennew = openBinaryFile dest WriteMode - getrest _ _ [] = return True - getrest sz bytesprocessed (k:ks) = do - let p = offsetMeterUpdate basep bytesprocessed - sink p =<< retriever (encryptor k) - getrest sz (addBytesProcessed bytesprocessed sz) ks + -- Open the file and seek to the start point in order to resume. + openresume startpoint = do + -- ReadWriteMode allows seeking; AppendMode does not. + h <- openBinaryFile dest ReadWriteMode + hSeek h AbsoluteSeek startpoint + return h + +{- Can resume when the chunk's offset is at or before the end of + - the dest file. -} +resumeOffset :: Maybe Integer -> Key -> Maybe Integer +resumeOffset Nothing _ = Nothing +resumeOffset currsize k + | offset <= currsize = offset + | otherwise = Nothing + where + offset = chunkKeyOffset k + +{- Drops chunks that are already present in a file, based on its size. + - Keeps any non-chunk keys. + -} +setupResume :: [[Key]] -> Integer -> [[Key]] +setupResume ls currsize = map dropunneeded ls + where + dropunneeded [] = [] + dropunneeded l@(k:_) = case keyChunkSize k of + Just chunksize | chunksize > 0 -> + genericDrop (currsize `div` chunksize) l + _ -> l {- Checks if a key is present in a remote. This requires any one - of the lists of options returned by chunkKeys to all check out @@ -212,7 +262,8 @@ hasKeyChunks hasKeyChunks checker u chunkconfig encryptor basek | noChunks chunkconfig = -- Optimisation: Try the unchunked key first, to avoid - -- looking in the git-annex branch for chunk counts. + -- looking in the git-annex branch for chunk counts + -- that are likely not there. ifM ((Right True ==) <$> checker (encryptor basek)) ( return (Right True) , checklists impossible =<< chunkKeysOnly u basek diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index ac8917851..66e02da12 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -103,9 +103,7 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr safely $ prepareretriever k $ safely . go where go (Just retriever) = metered (Just p) k $ \p' -> - bracketIO (openBinaryFile dest WriteMode) hClose $ \h -> - retrieveChunks retriever (uuid r) chunkconfig enck k p' $ - sink h + retrieveChunks retriever (uuid r) chunkconfig enck k dest p' sink go Nothing = return False sink h p' b = do let write = meteredWrite p' h diff --git a/Types/Key.hs b/Types/Key.hs index 3015b1e86..154e813ff 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -13,8 +13,8 @@ module Types.Key ( stubKey, key2file, file2key, - isChunkKey, nonChunkKey, + chunkKeyOffset, prop_idempotent_key_encode, prop_idempotent_key_decode @@ -49,9 +49,6 @@ stubKey = Key , keyChunkNum = Nothing } -isChunkKey :: Key -> Bool -isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k) - -- Gets the parent of a chunk key. nonChunkKey :: Key -> Key nonChunkKey k = k @@ -59,6 +56,12 @@ nonChunkKey k = k , keyChunkNum = Nothing } +-- Where a chunk key is offset within its parent. +chunkKeyOffset :: Key -> Maybe Integer +chunkKeyOffset k = (*) + <$> keyChunkSize k + <*> (pred <$> keyChunkNum k) + fieldSep :: Char fieldSep = '-' diff --git a/Utility/Metered.hs b/Utility/Metered.hs index bca7f58e7..cc07f9c35 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -88,7 +88,7 @@ meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h -> {- Applies an offset to a MeterUpdate. This can be useful when - performing a sequence of actions, such as multiple meteredWriteFiles, - - that all update a common meter progressively. + - that all update a common meter progressively. Or when resuming. -} offsetMeterUpdate :: MeterUpdate -> BytesProcessed -> MeterUpdate offsetMeterUpdate base offset = \n -> base (offset `addBytesProcessed` n) diff --git a/debian/changelog b/debian/changelog index d5c000003..c85247b69 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,9 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in directory remotes. - * The old chunksize= option is deprecated. Do not use for new remotes! + * Partially transferred files are automatically resumed when using + chunked remotes! + * The old chunksize= option is deprecated. Do not use for new remotes. * Legacy code for directory remotes using the old chunksize= option will keep them working, but more slowly than before. * webapp: Automatically install Konqueror integration scripts diff --git a/doc/chunking.mdwn b/doc/chunking.mdwn index 1be1fbef6..d1dce317c 100644 --- a/doc/chunking.mdwn +++ b/doc/chunking.mdwn @@ -4,6 +4,8 @@ chunks that are stored on the remote. This can be useful to work around limitations on the size of files on the remote. +Chunking also allows for resuming interrupted downloads and uploads. + Note that git-annex has to buffer chunks in memory before they are sent to a remote. So, using a large chunk size will make it use more memory. |