summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Remote/Helper/Chunked.hs97
-rw-r--r--Remote/Helper/ChunkedEncryptable.hs4
-rw-r--r--Types/Key.hs11
-rw-r--r--Utility/Metered.hs2
-rw-r--r--debian/changelog4
-rw-r--r--doc/chunking.mdwn2
6 files changed, 88 insertions, 32 deletions
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs
index 3415c2df6..9ba6d9cbd 100644
--- a/Remote/Helper/Chunked.hs
+++ b/Remote/Helper/Chunked.hs
@@ -1,6 +1,6 @@
{- git-annex chunked remotes
-
- - Copyright 2012-2014 Joey Hess <joey@kitenet.net>
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -27,6 +27,7 @@ import Annex.Exception
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
+import Control.Exception
data ChunkConfig
= NoChunks
@@ -147,12 +148,16 @@ removeChunks remover u chunkconfig encryptor k = do
-
- When the remote is chunked, tries each of the options returned by
- chunkKeys until it finds one where the retriever successfully
- - gets the first key in the list. The content of that key, and any
+ - gets the first chunked key. The content of that key, and any
- other chunks in the list is fed to the sink.
-
- If retrival of one of the subsequent chunks throws an exception,
- gives up and returns False. Note that partial data may have been
- written to the sink in this case.
+ -
+ - Resuming is supported when using chunks. When the destination file
+ - already exists, it skips to the next chunked key that would be needed
+ - to resume.
-}
retrieveChunks
:: (Key -> IO L.ByteString)
@@ -160,43 +165,88 @@ retrieveChunks
-> ChunkConfig
-> EncKey
-> Key
+ -> FilePath
-> MeterUpdate
- -> (MeterUpdate -> L.ByteString -> IO ())
+ -> (Handle -> MeterUpdate -> L.ByteString -> IO ())
-> Annex Bool
-retrieveChunks retriever u chunkconfig encryptor basek basep sink
+retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
| noChunks chunkconfig =
-- Optimisation: Try the unchunked key first, to avoid
- -- looking in the git-annex branch for chunk counts.
- liftIO (retriever (encryptor basek) >>= sink basep >> return True)
- `catchNonAsyncAnnex`
- const (go =<< chunkKeysOnly u basek)
+ -- looking in the git-annex branch for chunk counts
+ -- that are likely not there.
+ getunchunked `catchNonAsyncAnnex`
+ const (go =<< chunkKeysOnly u basek)
| otherwise = go =<< chunkKeys u chunkconfig basek
where
- go ls = liftIO $ firstavail ls `catchNonAsync` giveup
+ go ls = liftIO $ do
+ currsize <- catchMaybeIO $
+ toInteger . fileSize <$> getFileStatus dest
+ let ls' = maybe ls (setupResume ls) currsize
+ firstavail currsize ls' `catchNonAsync` giveup
giveup e = do
warningIO (show e)
return False
- firstavail [] = return False
- firstavail ([]:ls) = firstavail ls
- firstavail ((k:ks):ls) = do
+ firstavail _ [] = return False
+ firstavail currsize ([]:ls) = firstavail currsize ls
+ firstavail currsize ((k:ks):ls) = do
v <- tryNonAsync $ retriever (encryptor k)
case v of
Left e
| null ls -> giveup e
- | otherwise -> firstavail ls
+ | otherwise -> firstavail currsize ls
Right b -> do
- sink basep b
- let sz = toBytesProcessed $
- fromMaybe 0 $ keyChunkSize k
- getrest sz sz ks
+ let offset = resumeOffset currsize k
+ let p = maybe basep
+ (offsetMeterUpdate basep . toBytesProcessed)
+ offset
+ bracket (maybe opennew openresume offset) hClose $ \h -> do
+ sink h p b
+ let sz = toBytesProcessed $
+ fromMaybe 0 $ keyChunkSize k
+ getrest p h sz sz ks
+
+ getrest _ _ _ _ [] = return True
+ getrest p h sz bytesprocessed (k:ks) = do
+ let p' = offsetMeterUpdate p bytesprocessed
+ sink h p' =<< retriever (encryptor k)
+ getrest p h sz (addBytesProcessed bytesprocessed sz) ks
+
+ getunchunked = liftIO $ bracket opennew hClose $ \h -> do
+ retriever (encryptor basek) >>= sink h basep
+ return True
+
+ opennew = openBinaryFile dest WriteMode
- getrest _ _ [] = return True
- getrest sz bytesprocessed (k:ks) = do
- let p = offsetMeterUpdate basep bytesprocessed
- sink p =<< retriever (encryptor k)
- getrest sz (addBytesProcessed bytesprocessed sz) ks
+ -- Open the file and seek to the start point in order to resume.
+ openresume startpoint = do
+ -- ReadWriteMode allows seeking; AppendMode does not.
+ h <- openBinaryFile dest ReadWriteMode
+ hSeek h AbsoluteSeek startpoint
+ return h
+
+{- Can resume when the chunk's offset is at or before the end of
+ - the dest file. -}
+resumeOffset :: Maybe Integer -> Key -> Maybe Integer
+resumeOffset Nothing _ = Nothing
+resumeOffset currsize k
+ | offset <= currsize = offset
+ | otherwise = Nothing
+ where
+ offset = chunkKeyOffset k
+
+{- Drops chunks that are already present in a file, based on its size.
+ - Keeps any non-chunk keys.
+ -}
+setupResume :: [[Key]] -> Integer -> [[Key]]
+setupResume ls currsize = map dropunneeded ls
+ where
+ dropunneeded [] = []
+ dropunneeded l@(k:_) = case keyChunkSize k of
+ Just chunksize | chunksize > 0 ->
+ genericDrop (currsize `div` chunksize) l
+ _ -> l
{- Checks if a key is present in a remote. This requires any one
- of the lists of options returned by chunkKeys to all check out
@@ -212,7 +262,8 @@ hasKeyChunks
hasKeyChunks checker u chunkconfig encryptor basek
| noChunks chunkconfig =
-- Optimisation: Try the unchunked key first, to avoid
- -- looking in the git-annex branch for chunk counts.
+ -- looking in the git-annex branch for chunk counts
+ -- that are likely not there.
ifM ((Right True ==) <$> checker (encryptor basek))
( return (Right True)
, checklists impossible =<< chunkKeysOnly u basek
diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs
index ac8917851..66e02da12 100644
--- a/Remote/Helper/ChunkedEncryptable.hs
+++ b/Remote/Helper/ChunkedEncryptable.hs
@@ -103,9 +103,7 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr
safely $ prepareretriever k $ safely . go
where
go (Just retriever) = metered (Just p) k $ \p' ->
- bracketIO (openBinaryFile dest WriteMode) hClose $ \h ->
- retrieveChunks retriever (uuid r) chunkconfig enck k p' $
- sink h
+ retrieveChunks retriever (uuid r) chunkconfig enck k dest p' sink
go Nothing = return False
sink h p' b = do
let write = meteredWrite p' h
diff --git a/Types/Key.hs b/Types/Key.hs
index 3015b1e86..154e813ff 100644
--- a/Types/Key.hs
+++ b/Types/Key.hs
@@ -13,8 +13,8 @@ module Types.Key (
stubKey,
key2file,
file2key,
- isChunkKey,
nonChunkKey,
+ chunkKeyOffset,
prop_idempotent_key_encode,
prop_idempotent_key_decode
@@ -49,9 +49,6 @@ stubKey = Key
, keyChunkNum = Nothing
}
-isChunkKey :: Key -> Bool
-isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k)
-
-- Gets the parent of a chunk key.
nonChunkKey :: Key -> Key
nonChunkKey k = k
@@ -59,6 +56,12 @@ nonChunkKey k = k
, keyChunkNum = Nothing
}
+-- Where a chunk key is offset within its parent.
+chunkKeyOffset :: Key -> Maybe Integer
+chunkKeyOffset k = (*)
+ <$> keyChunkSize k
+ <*> (pred <$> keyChunkNum k)
+
fieldSep :: Char
fieldSep = '-'
diff --git a/Utility/Metered.hs b/Utility/Metered.hs
index bca7f58e7..cc07f9c35 100644
--- a/Utility/Metered.hs
+++ b/Utility/Metered.hs
@@ -88,7 +88,7 @@ meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h ->
{- Applies an offset to a MeterUpdate. This can be useful when
- performing a sequence of actions, such as multiple meteredWriteFiles,
- - that all update a common meter progressively.
+ - that all update a common meter progressively. Or when resuming.
-}
offsetMeterUpdate :: MeterUpdate -> BytesProcessed -> MeterUpdate
offsetMeterUpdate base offset = \n -> base (offset `addBytesProcessed` n)
diff --git a/debian/changelog b/debian/changelog
index d5c000003..c85247b69 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,7 +1,9 @@
git-annex (5.20140718) UNRELEASED; urgency=medium
* New chunk= option to chunk files stored in directory remotes.
- * The old chunksize= option is deprecated. Do not use for new remotes!
+ * Partially transferred files are automatically resumed when using
+ chunked remotes!
+ * The old chunksize= option is deprecated. Do not use for new remotes.
* Legacy code for directory remotes using the old chunksize= option
will keep them working, but more slowly than before.
* webapp: Automatically install Konqueror integration scripts
diff --git a/doc/chunking.mdwn b/doc/chunking.mdwn
index 1be1fbef6..d1dce317c 100644
--- a/doc/chunking.mdwn
+++ b/doc/chunking.mdwn
@@ -4,6 +4,8 @@ chunks that are stored on the remote.
This can be useful to work around limitations on the size of files
on the remote.
+Chunking also allows for resuming interrupted downloads and uploads.
+
Note that git-annex has to buffer chunks in memory before they are sent to
a remote. So, using a large chunk size will make it use more memory.