summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-08-01 18:00:47 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-08-01 18:00:47 -0400
commitddbf5df3c9940473663a6e562f8ee3583867046e (patch)
tree40dc81d0c2693b5fd5fc3e5b9cd8df17e0505ec5
parent154cb13180fbe877d2030d83a415b30150ac7298 (diff)
parentd0a8e3d6217f2924b864393d425b6d7582370d07 (diff)
Merge branch 'newchunks'
I am happy enough with this to make it live!
-rw-r--r--Annex/Content.hs6
-rw-r--r--Annex/Exception.hs15
-rw-r--r--Assistant/Threads/Watcher.hs2
-rw-r--r--Backend.hs17
-rw-r--r--Backend/Hash.hs39
-rw-r--r--Backend/URL.hs3
-rw-r--r--Backend/WORM.hs1
-rw-r--r--CmdLine/GitAnnex.hs6
-rw-r--r--Command/FuzzTest.hs2
-rw-r--r--Command/Test.hs2
-rw-r--r--Command/TestRemote.hs196
-rw-r--r--Crypto.hs20
-rw-r--r--Locations.hs5
-rw-r--r--Logs/Chunk.hs27
-rw-r--r--Logs/Chunk/Pure.hs24
-rw-r--r--Remote/Directory.hs212
-rw-r--r--Remote/Directory/LegacyChunked.hs112
-rw-r--r--Remote/External.hs60
-rw-r--r--Remote/Glacier.hs3
-rw-r--r--Remote/Helper/Chunked.hs395
-rw-r--r--Remote/Helper/Chunked/Legacy.hs14
-rw-r--r--Remote/Helper/ChunkedEncryptable.hs200
-rw-r--r--Remote/Helper/Encryptable.hs59
-rw-r--r--Remote/WebDAV.hs49
-rw-r--r--Types/Backend.hs7
-rw-r--r--Types/Command.hs2
-rw-r--r--Types/Key.hs15
-rw-r--r--Types/Remote.hs6
-rw-r--r--Types/StoreRetrieve.hs37
-rw-r--r--Utility/Gpg.hs36
-rw-r--r--Utility/Metered.hs19
-rw-r--r--Utility/Process.hs1
-rw-r--r--Utility/Tmp.hs13
-rw-r--r--debian/changelog9
-rw-r--r--doc/chunking.mdwn31
-rw-r--r--doc/design/assistant/chunks.mdwn9
-rw-r--r--doc/design/external_special_remote_protocol.mdwn9
-rw-r--r--doc/git-annex.mdwn31
-rw-r--r--doc/internals/hashing.mdwn5
-rw-r--r--doc/special_remotes/directory.mdwn12
-rwxr-xr-xdoc/special_remotes/external/example.sh15
-rw-r--r--doc/special_remotes/webdav.mdwn12
-rw-r--r--doc/tips/using_box.com_as_a_special_remote.mdwn6
-rw-r--r--git-annex.cabal4
44 files changed, 1358 insertions, 390 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 8ad3d5e65..eb84f2fe9 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -16,6 +16,7 @@ module Annex.Content (
getViaTmpChecked,
getViaTmpUnchecked,
prepGetViaTmpChecked,
+ prepTmp,
withTmp,
checkDiskSpace,
moveAnnex,
@@ -264,7 +265,10 @@ prepTmp key = do
createAnnexDirectory (parentDir tmp)
return tmp
-{- Creates a temp file, runs an action on it, and cleans up the temp file. -}
+{- Creates a temp file for a key, runs an action on it, and cleans up
+ - the temp file. If the action throws an exception, the temp file is
+ - left behind, which allows for resuming.
+ -}
withTmp :: Key -> (FilePath -> Annex a) -> Annex a
withTmp key action = do
tmp <- prepTmp key
diff --git a/Annex/Exception.hs b/Annex/Exception.hs
index 41a9ed921..5ecbd28a0 100644
--- a/Annex/Exception.hs
+++ b/Annex/Exception.hs
@@ -5,12 +5,13 @@
- AnnexState are retained. This works because the Annex monad
- internally stores the AnnexState in a MVar.
-
- - Copyright 2011-2013 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE PackageImports #-}
+{-# LANGUAGE ScopedTypeVariables #-}
module Annex.Exception (
bracketIO,
@@ -19,6 +20,8 @@ module Annex.Exception (
tryAnnexIO,
throwAnnex,
catchAnnex,
+ catchNonAsyncAnnex,
+ tryNonAsyncAnnex,
) where
import qualified Control.Monad.Catch as M
@@ -48,3 +51,13 @@ throwAnnex = M.throwM
{- catch in the Annex monad -}
catchAnnex :: Exception e => Annex a -> (e -> Annex a) -> Annex a
catchAnnex = M.catch
+
+{- catchs all exceptions except for async exceptions -}
+catchNonAsyncAnnex :: Annex a -> (SomeException -> Annex a) -> Annex a
+catchNonAsyncAnnex a onerr = a `M.catches`
+ [ M.Handler (\ (e :: AsyncException) -> throwAnnex e)
+ , M.Handler (\ (e :: SomeException) -> onerr e)
+ ]
+
+tryNonAsyncAnnex :: Annex a -> Annex (Either SomeException a)
+tryNonAsyncAnnex a = (Right <$> a) `catchNonAsyncAnnex` (return . Left)
diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs
index 0ed1bd22f..91e0fc619 100644
--- a/Assistant/Threads/Watcher.hs
+++ b/Assistant/Threads/Watcher.hs
@@ -184,7 +184,7 @@ runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant ()
runHandler handler file filestatus = void $ do
r <- tryIO <~> handler (normalize file) filestatus
case r of
- Left e -> liftIO $ print e
+ Left e -> liftIO $ warningIO $ show e
Right Nothing -> noop
Right (Just change) -> do
-- Just in case the commit thread is not
diff --git a/Backend.hs b/Backend.hs
index dded0d005..0fcaaa7ed 100644
--- a/Backend.hs
+++ b/Backend.hs
@@ -14,7 +14,8 @@ module Backend (
isAnnexLink,
chooseBackend,
lookupBackendName,
- maybeLookupBackendName
+ maybeLookupBackendName,
+ isStableKey,
) where
import Common.Annex
@@ -32,6 +33,8 @@ import qualified Backend.Hash
import qualified Backend.WORM
import qualified Backend.URL
+import qualified Data.Map as M
+
list :: [Backend]
list = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends
@@ -116,7 +119,13 @@ lookupBackendName :: String -> Backend
lookupBackendName s = fromMaybe unknown $ maybeLookupBackendName s
where
unknown = error $ "unknown backend " ++ s
+
maybeLookupBackendName :: String -> Maybe Backend
-maybeLookupBackendName s = headMaybe matches
- where
- matches = filter (\b -> s == B.name b) list
+maybeLookupBackendName s = M.lookup s nameMap
+
+nameMap :: M.Map String Backend
+nameMap = M.fromList $ zip (map B.name list) list
+
+isStableKey :: Key -> Bool
+isStableKey k = maybe False (`B.isStableKey` k)
+ (maybeLookupBackendName (keyBackendName k))
diff --git a/Backend/Hash.hs b/Backend/Hash.hs
index 3ff496271..62d0a0fca 100644
--- a/Backend/Hash.hs
+++ b/Backend/Hash.hs
@@ -7,7 +7,10 @@
{-# LANGUAGE CPP #-}
-module Backend.Hash (backends) where
+module Backend.Hash (
+ backends,
+ testKeyBackend,
+) where
import Common.Annex
import qualified Annex
@@ -36,24 +39,23 @@ hashes = concat
{- The SHA256E backend is the default, so genBackendE comes first. -}
backends :: [Backend]
-backends = catMaybes $ map genBackendE hashes ++ map genBackend hashes
+backends = map genBackendE hashes ++ map genBackend hashes
-genBackend :: Hash -> Maybe Backend
-genBackend hash = Just Backend
+genBackend :: Hash -> Backend
+genBackend hash = Backend
{ name = hashName hash
, getKey = keyValue hash
, fsckKey = Just $ checkKeyChecksum hash
, canUpgradeKey = Just needsUpgrade
, fastMigrate = Just trivialMigrate
+ , isStableKey = const True
}
-genBackendE :: Hash -> Maybe Backend
-genBackendE hash = do
- b <- genBackend hash
- return $ b
- { name = hashNameE hash
- , getKey = keyValueE hash
- }
+genBackendE :: Hash -> Backend
+genBackendE hash = (genBackend hash)
+ { name = hashNameE hash
+ , getKey = keyValueE hash
+ }
hashName :: Hash -> String
hashName (SHAHash size) = "SHA" ++ show size
@@ -175,3 +177,18 @@ skeinHasher hashsize
| hashsize == 512 = show . skein512
#endif
| otherwise = error $ "unsupported skein size " ++ show hashsize
+
+{- A varient of the SHA256E backend, for testing that needs special keys
+ - that cannot collide with legitimate keys in the repository.
+ -
+ - This is accomplished by appending a special extension to the key,
+ - that is not one that selectExtension would select (due to being too
+ - long).
+ -}
+testKeyBackend :: Backend
+testKeyBackend =
+ let b = genBackendE (SHAHash 256)
+ in b { getKey = (fmap addE) <$$> getKey b }
+ where
+ addE k = k { keyName = keyName k ++ longext }
+ longext = ".this-is-a-test-key"
diff --git a/Backend/URL.hs b/Backend/URL.hs
index 4233c56bc..2c2988ac0 100644
--- a/Backend/URL.hs
+++ b/Backend/URL.hs
@@ -25,6 +25,9 @@ backend = Backend
, fsckKey = Nothing
, canUpgradeKey = Nothing
, fastMigrate = Nothing
+ -- The content of an url can change at any time, so URL keys are
+ -- not stable.
+ , isStableKey = const False
}
{- Every unique url has a corresponding key. -}
diff --git a/Backend/WORM.hs b/Backend/WORM.hs
index fdeea6f89..c972602ad 100644
--- a/Backend/WORM.hs
+++ b/Backend/WORM.hs
@@ -23,6 +23,7 @@ backend = Backend
, fsckKey = Nothing
, canUpgradeKey = Nothing
, fastMigrate = Nothing
+ , isStableKey = const True
}
{- The key includes the file size, modification time, and the
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs
index 4c9377df9..80a784dd7 100644
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@@ -96,9 +96,10 @@ import qualified Command.XMPPGit
#endif
import qualified Command.RemoteDaemon
#endif
-import qualified Command.Test
#ifdef WITH_TESTSUITE
+import qualified Command.Test
import qualified Command.FuzzTest
+import qualified Command.TestRemote
#endif
#ifdef WITH_EKG
import System.Remote.Monitoring
@@ -187,9 +188,10 @@ cmds = concat
#endif
, Command.RemoteDaemon.def
#endif
- , Command.Test.def
#ifdef WITH_TESTSUITE
+ , Command.Test.def
, Command.FuzzTest.def
+ , Command.TestRemote.def
#endif
]
diff --git a/Command/FuzzTest.hs b/Command/FuzzTest.hs
index 08103edc8..d673541fb 100644
--- a/Command/FuzzTest.hs
+++ b/Command/FuzzTest.hs
@@ -22,7 +22,7 @@ import Test.QuickCheck
import Control.Concurrent
def :: [Command]
-def = [ notBareRepo $ command "fuzztest" paramNothing seek SectionPlumbing
+def = [ notBareRepo $ command "fuzztest" paramNothing seek SectionTesting
"generates fuzz test files"]
seek :: CommandSeek
diff --git a/Command/Test.hs b/Command/Test.hs
index ee7220142..08e9d1b6e 100644
--- a/Command/Test.hs
+++ b/Command/Test.hs
@@ -13,7 +13,7 @@ import Messages
def :: [Command]
def = [ noRepo startIO $ dontCheck repoExists $
- command "test" paramNothing seek SectionPlumbing
+ command "test" paramNothing seek SectionTesting
"run built-in test suite"]
seek :: CommandSeek
diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs
new file mode 100644
index 000000000..29a2e809c
--- /dev/null
+++ b/Command/TestRemote.hs
@@ -0,0 +1,196 @@
+{- git-annex command
+ -
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.TestRemote where
+
+import Common
+import Command
+import qualified Annex
+import qualified Remote
+import qualified Types.Remote as Remote
+import Types
+import Types.Key (key2file, keyBackendName, keySize)
+import Types.Backend (getKey, fsckKey)
+import Types.KeySource
+import Annex.Content
+import Backend
+import qualified Backend.Hash
+import Utility.Tmp
+import Utility.Metered
+import Utility.DataUnits
+import Utility.CopyFile
+import Messages
+import Types.Messages
+import Remote.Helper.Chunked
+import Locations
+
+import Test.Tasty
+import Test.Tasty.Runners
+import Test.Tasty.HUnit
+import Control.Exception
+import "crypto-api" Crypto.Random
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Map as M
+
+def :: [Command]
+def = [ withOptions [sizeOption] $
+ command "testremote" paramRemote seek SectionTesting
+ "test transfers to/from a remote"]
+
+sizeOption :: Option
+sizeOption = fieldOption [] "size" paramSize "base key size (default 1MiB)"
+
+seek :: CommandSeek
+seek ps = do
+ basesz <- fromInteger . fromMaybe (1024 * 1024)
+ <$> getOptionField sizeOption (pure . getsize)
+ withWords (start basesz) ps
+ where
+ getsize v = v >>= readSize dataUnits
+
+start :: Int -> [String] -> CommandStart
+start basesz ws = do
+ let name = unwords ws
+ showStart "testremote" name
+ r <- either error id <$> Remote.byName' name
+ showSideAction "generating test keys"
+ ks <- mapM randKey (keySizes basesz)
+ rs <- catMaybes <$> mapM (adjustChunkSize r) (chunkSizes basesz)
+ rs' <- concat <$> mapM encryptionVariants rs
+ next $ perform rs' ks
+
+perform :: [Remote] -> [Key] -> CommandPerform
+perform rs ks = do
+ st <- Annex.getState id
+ let tests = testGroup "Remote Tests" $
+ [ testGroup (desc r k) (test st r k) | k <- ks, r <- rs ]
+ ok <- case tryIngredients [consoleTestReporter] mempty tests of
+ Nothing -> error "No tests found!?"
+ Just act -> liftIO act
+ next $ cleanup rs ks ok
+ where
+ desc r' k = intercalate "; " $ map unwords
+ [ [ "key size", show (keySize k) ]
+ , [ show (chunkConfig (Remote.config r')) ]
+ , ["encryption", fromMaybe "none" (M.lookup "encryption" (Remote.config r'))]
+ ]
+
+adjustChunkSize :: Remote -> Int -> Annex (Maybe Remote)
+adjustChunkSize r chunksize = adjustRemoteConfig r
+ (M.insert "chunk" (show chunksize))
+
+-- Variants of a remote with no encryption, and with simple shared
+-- encryption. Gpg key based encryption is not tested.
+encryptionVariants :: Remote -> Annex [Remote]
+encryptionVariants r = do
+ noenc <- adjustRemoteConfig r (M.insert "encryption" "none")
+ sharedenc <- adjustRemoteConfig r $
+ M.insert "encryption" "shared" .
+ M.insert "highRandomQuality" "false"
+ return $ catMaybes [noenc, sharedenc]
+
+-- Regenerate a remote with a modified config.
+adjustRemoteConfig :: Remote -> (Remote.RemoteConfig -> Remote.RemoteConfig) -> Annex (Maybe Remote)
+adjustRemoteConfig r adjustconfig = Remote.generate (Remote.remotetype r)
+ (Remote.repo r)
+ (Remote.uuid r)
+ (adjustconfig (Remote.config r))
+ (Remote.gitconfig r)
+
+test :: Annex.AnnexState -> Remote -> Key -> [TestTree]
+test st r k =
+ [ check "removeKey when not present" remove
+ , present False
+ , check "storeKey" store
+ , present True
+ , check "storeKey when already present" store
+ , present True
+ , check "retrieveKeyFile" $ do
+ removeAnnex k
+ get
+ , check "fsck downloaded object" fsck
+ , check "retrieveKeyFile resume from 33%" $ do
+ loc <- Annex.calcRepo (gitAnnexLocation k)
+ tmp <- prepTmp k
+ partial <- liftIO $ bracket (openBinaryFile loc ReadMode) hClose $ \h -> do
+ sz <- hFileSize h
+ L.hGet h $ fromInteger $ sz `div` 3
+ liftIO $ L.writeFile tmp partial
+ removeAnnex k
+ get
+ , check "fsck downloaded object" fsck
+ , check "retrieveKeyFile resume from 0" $ do
+ tmp <- prepTmp k
+ liftIO $ writeFile tmp ""
+ removeAnnex k
+ get
+ , check "fsck downloaded object" fsck
+ , check "retrieveKeyFile resume from end" $ do
+ loc <- Annex.calcRepo (gitAnnexLocation k)
+ tmp <- prepTmp k
+ void $ liftIO $ copyFileExternal loc tmp
+ removeAnnex k
+ get
+ , check "fsck downloaded object" fsck
+ , check "removeKey when present" remove
+ , present False
+ ]
+ where
+ check desc a = testCase desc $
+ Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed"
+ present b = check ("present " ++ show b) $
+ (== Right b) <$> Remote.hasKey r k
+ fsck = case maybeLookupBackendName (keyBackendName k) of
+ Nothing -> return True
+ Just b -> case fsckKey b of
+ Nothing -> return True
+ Just fscker -> fscker k (key2file k)
+ get = getViaTmp k $ \dest ->
+ Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate
+ store = Remote.storeKey r k Nothing nullMeterUpdate
+ remove = Remote.removeKey r k
+
+cleanup :: [Remote] -> [Key] -> Bool -> CommandCleanup
+cleanup rs ks ok = do
+ forM_ rs $ \r -> forM_ ks (Remote.removeKey r)
+ forM_ ks removeAnnex
+ return ok
+
+chunkSizes :: Int -> [Int]
+chunkSizes base =
+ [ 0 -- no chunking
+ , base `div` 100
+ , base `div` 1000
+ , base
+ ]
+
+keySizes :: Int -> [Int]
+keySizes base = filter (>= 0)
+ [ 0 -- empty key is a special case when chunking
+ , base
+ , base + 1
+ , base - 1
+ , base * 2
+ ]
+
+randKey :: Int -> Annex Key
+randKey sz = withTmpFile "randkey" $ \f h -> do
+ gen <- liftIO (newGenIO :: IO SystemRandom)
+ case genBytes sz gen of
+ Left e -> error $ "failed to generate random key: " ++ show e
+ Right (rand, _) -> liftIO $ B.hPut h rand
+ liftIO $ hClose h
+ let ks = KeySource
+ { keyFilename = f
+ , contentLocation = f
+ , inodeCache = Nothing
+ }
+ k <- fromMaybe (error "failed to generate random key")
+ <$> getKey Backend.Hash.testKeyBackend ks
+ moveAnnex k f
+ return k
diff --git a/Crypto.hs b/Crypto.hs
index 0bfa81db2..dcefc2959 100644
--- a/Crypto.hs
+++ b/Crypto.hs
@@ -3,16 +3,18 @@
- Currently using gpg; could later be modified to support different
- crypto backends if neccessary.
-
- - Copyright 2011-2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE Rank2Types #-}
module Crypto (
Cipher,
KeyIds(..),
+ EncKey,
StorableCipher(..),
genEncryptedCipher,
genSharedCipher,
@@ -34,6 +36,8 @@ import qualified Data.ByteString.Lazy as L
import Data.ByteString.Lazy.UTF8 (fromString)
import Control.Applicative
import qualified Data.Map as M
+import Control.Monad.IO.Class
+import Control.Monad.Catch (MonadMask)
import Common.Annex
import qualified Utility.Gpg as Gpg
@@ -138,17 +142,19 @@ decryptCipher (EncryptedCipher t variant _) =
Hybrid -> Cipher
PubKey -> MacOnlyCipher
+type EncKey = Key -> Key
+
{- Generates an encrypted form of a Key. The encryption does not need to be
- reversable, nor does it need to be the same type of encryption used
- on content. It does need to be repeatable. -}
-encryptKey :: Mac -> Cipher -> Key -> Key
+encryptKey :: Mac -> Cipher -> EncKey
encryptKey mac c k = stubKey
{ keyName = macWithCipher mac c (key2file k)
, keyBackendName = "GPG" ++ showMac mac
}
type Feeder = Handle -> IO ()
-type Reader a = Handle -> IO a
+type Reader m a = Handle -> m a
feedFile :: FilePath -> Feeder
feedFile f h = L.hPut h =<< L.readFile f
@@ -156,8 +162,8 @@ feedFile f h = L.hPut h =<< L.readFile f
feedBytes :: L.ByteString -> Feeder
feedBytes = flip L.hPut
-readBytes :: (L.ByteString -> IO a) -> Reader a
-readBytes a h = L.hGetContents h >>= a
+readBytes :: (MonadIO m) => (L.ByteString -> m a) -> Reader m a
+readBytes a h = liftIO (L.hGetContents h) >>= a
{- Runs a Feeder action, that generates content that is symmetrically
- encrypted with the Cipher (unless it is empty, in which case
@@ -165,7 +171,7 @@ readBytes a h = L.hGetContents h >>= a
- read by the Reader action. Note: For public-key encryption,
- recipients MUST be included in 'params' (for instance using
- 'getGpgEncParams'). -}
-encrypt :: [CommandParam] -> Cipher -> Feeder -> Reader a -> IO a
+encrypt :: (MonadIO m, MonadMask m) => [CommandParam] -> Cipher -> Feeder -> Reader m a -> m a
encrypt params cipher = case cipher of
Cipher{} -> Gpg.feedRead (params ++ Gpg.stdEncryptionParams True) $
cipherPassphrase cipher
@@ -174,7 +180,7 @@ encrypt params cipher = case cipher of
{- Runs a Feeder action, that generates content that is decrypted with the
- Cipher (or using a private key if the Cipher is empty), and read by the
- Reader action. -}
-decrypt :: Cipher -> Feeder -> Reader a -> IO a
+decrypt :: (MonadIO m, MonadMask m) => Cipher -> Feeder -> Reader m a -> m a
decrypt cipher = case cipher of
Cipher{} -> Gpg.feedRead [Param "--decrypt"] $ cipherPassphrase cipher
MacOnlyCipher{} -> Gpg.pipeLazy [Param "--decrypt"]
diff --git a/Locations.hs b/Locations.hs
index d397a97be..0369c7a1c 100644
--- a/Locations.hs
+++ b/Locations.hs
@@ -421,6 +421,7 @@ keyPaths key = map (keyPath key) annexHashes
- which do not allow using a directory "XX" when "xx" already exists.
- To support that, most repositories use the lower case hash for new data. -}
type Hasher = Key -> FilePath
+
annexHashes :: [Hasher]
annexHashes = [hashDirLower, hashDirMixed]
@@ -428,12 +429,12 @@ hashDirMixed :: Hasher
hashDirMixed k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir
where
dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d]
- ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file k
+ ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
hashDirLower :: Hasher
hashDirLower k = addTrailingPathSeparator $ take 3 dir </> drop 3 dir
where
- dir = take 6 $ md5s $ md5FilePath $ key2file k
+ dir = take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
{- modified version of display_32bits_as_hex from Data.Hash.MD5
- Copyright (C) 2001 Ian Lynagh
diff --git a/Logs/Chunk.hs b/Logs/Chunk.hs
index 76da50947..a3e18efc1 100644
--- a/Logs/Chunk.hs
+++ b/Logs/Chunk.hs
@@ -15,7 +15,14 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-module Logs.Chunk where
+module Logs.Chunk (
+ ChunkMethod(..),
+ ChunkSize,
+ ChunkCount,
+ chunksStored,
+ chunksRemoved,
+ getCurrentChunks,
+) where
import Common.Annex
import Logs
@@ -26,19 +33,19 @@ import Logs.Chunk.Pure
import qualified Data.Map as M
import Data.Time.Clock.POSIX
-chunksStored :: UUID -> Key -> ChunkSize -> ChunkCount -> Annex ()
-chunksStored u k chunksize chunkcount = do
+chunksStored :: UUID -> Key -> ChunkMethod -> ChunkCount -> Annex ()
+chunksStored u k chunkmethod chunkcount = do
ts <- liftIO getPOSIXTime
Annex.Branch.change (chunkLogFile k) $
- showLog . changeMapLog ts (u, chunksize) chunkcount . parseLog
+ showLog . changeMapLog ts (u, chunkmethod) chunkcount . parseLog
-chunksRemoved :: UUID -> Key -> ChunkSize -> Annex ()
-chunksRemoved u k chunksize = chunksStored u k chunksize 0
+chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex ()
+chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
-getCurrentChunks :: UUID -> Key -> Annex [(ChunkSize, ChunkCount)]
+getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)]
getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k)
where
- select = filter (\(_sz, ct) -> ct > 0)
- . map (\((_ku, sz), l) -> (sz, value l))
+ select = filter (\(_m, ct) -> ct > 0)
+ . map (\((_ku, m), l) -> (m, value l))
. M.toList
- . M.filterWithKey (\(ku, _sz) _ -> ku == u)
+ . M.filterWithKey (\(ku, _m) _ -> ku == u)
diff --git a/Logs/Chunk/Pure.hs b/Logs/Chunk/Pure.hs
index 9bbfb868c..26fdd63c2 100644
--- a/Logs/Chunk/Pure.hs
+++ b/Logs/Chunk/Pure.hs
@@ -6,7 +6,8 @@
-}
module Logs.Chunk.Pure
- ( ChunkSize
+ ( ChunkMethod(..)
+ , ChunkSize
, ChunkCount
, ChunkLog
, parseLog
@@ -17,24 +18,37 @@ import Common.Annex
import Logs.MapLog
import Data.Int
+-- Currently chunks are all fixed size, but other chunking methods
+-- may be added.
+data ChunkMethod = FixedSizeChunks ChunkSize | UnknownChunks String
+ deriving (Ord, Eq, Show)
+
type ChunkSize = Int64
+-- 0 when chunks are no longer present
type ChunkCount = Integer
-type ChunkLog = MapLog (UUID, ChunkSize) ChunkCount
+type ChunkLog = MapLog (UUID, ChunkMethod) ChunkCount
+
+parseChunkMethod :: String -> ChunkMethod
+parseChunkMethod s = maybe (UnknownChunks s) FixedSizeChunks (readish s)
+
+showChunkMethod :: ChunkMethod -> String
+showChunkMethod (FixedSizeChunks sz) = show sz
+showChunkMethod (UnknownChunks s) = s
parseLog :: String -> ChunkLog
parseLog = parseMapLog fieldparser valueparser
where
fieldparser s =
- let (u,sz) = separate (== sep) s
- in (,) <$> pure (toUUID u) <*> readish sz
+ let (u,m) = separate (== sep) s
+ in Just (toUUID u, parseChunkMethod m)
valueparser = readish
showLog :: ChunkLog -> String
showLog = showMapLog fieldshower valueshower
where
- fieldshower (u, sz) = fromUUID u ++ sep : show sz
+ fieldshower (u, m) = fromUUID u ++ sep : showChunkMethod m
valueshower = show
sep :: Char
diff --git a/Remote/Directory.hs b/Remote/Directory.hs
index 62c01e370..37942a295 100644
--- a/Remote/Directory.hs
+++ b/Remote/Directory.hs
@@ -1,16 +1,16 @@
{- A "remote" that is just a filesystem directory.
-
- - Copyright 2011-2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
+{-# LANGUAGE Rank2Types #-}
module Remote.Directory (remote) where
import qualified Data.ByteString.Lazy as L
-import qualified Data.ByteString as S
import qualified Data.Map as M
import Common.Annex
@@ -21,10 +21,8 @@ import Config.Cost
import Config
import Utility.FileMode
import Remote.Helper.Special
-import Remote.Helper.Encryptable
-import Remote.Helper.Chunked
-import qualified Remote.Helper.Chunked.Legacy as Legacy
-import Crypto
+import Remote.Helper.ChunkedEncryptable
+import qualified Remote.Directory.LegacyChunked as Legacy
import Annex.Content
import Annex.UUID
import Utility.Metered
@@ -41,15 +39,15 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot
gen r u c gc = do
cst <- remoteCost gc cheapRemoteCost
let chunkconfig = chunkConfig c
- return $ Just $ encryptableRemote c
- (storeEncrypted dir (getGpgEncParams (c,gc)) chunkconfig)
- (retrieveEncrypted dir chunkconfig)
+ return $ Just $ chunkedEncryptableRemote c
+ (prepareStore dir chunkconfig)
+ (retrieve dir chunkconfig)
Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
- storeKey = store dir chunkconfig,
- retrieveKeyFile = retrieve dir chunkconfig,
+ storeKey = storeKeyDummy,
+ retrieveKeyFile = retreiveKeyFileDummy,
retrieveKeyFileCheap = retrieveCheap dir chunkconfig,
removeKey = remove dir,
hasKey = checkPresent dir chunkconfig,
@@ -84,125 +82,49 @@ directorySetup mu _ c = do
gitConfigSpecialRemote u c' "directory" absdir
return (M.delete "directory" c', u)
-{- Locations to try to access a given Key in the Directory.
- - We try more than since we used to write to different hash directories. -}
+{- Locations to try to access a given Key in the directory.
+ - We try more than one since we used to write to different hash
+ - directories. -}
locations :: FilePath -> Key -> [FilePath]
locations d k = map (d </>) (keyPaths k)
+{- Returns the location off a Key in the directory. If the key is
+ - present, returns the location that is actually used, otherwise
+ - returns the first, default location. -}
+getLocation :: FilePath -> Key -> IO FilePath
+getLocation d k = do
+ let locs = locations d k
+ fromMaybe (Prelude.head locs) <$> firstM doesFileExist locs
+
{- Directory where the file(s) for a key are stored. -}
storeDir :: FilePath -> Key -> FilePath
storeDir d k = addTrailingPathSeparator $ d </> hashDirLower k </> keyFile k
-{- Where we store temporary data for a key as it's being uploaded. -}
+{- Where we store temporary data for a key, in the directory, as it's being
+ - written. -}
tmpDir :: FilePath -> Key -> FilePath
tmpDir d k = addTrailingPathSeparator $ d </> "tmp" </> keyFile k
-withCheckedFiles :: (FilePath -> IO Bool) -> ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
-withCheckedFiles _ _ [] _ _ = return False
-withCheckedFiles check (LegacyChunks _) d k a = go $ locations d k
- where
- go [] = return False
- go (f:fs) = do
- let chunkcount = f ++ Legacy.chunkCount
- ifM (check chunkcount)
- ( do
- chunks <- Legacy.listChunks f <$> readFile chunkcount
- ifM (allM check chunks)
- ( a chunks , return False )
- , do
- chunks <- Legacy.probeChunks f check
- if null chunks
- then go fs
- else a chunks
- )
-withCheckedFiles check _ d k a = go $ locations d k
- where
- go [] = return False
- go (f:fs) = ifM (check f) ( a [f] , go fs )
-
-withStoredFiles :: ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
-withStoredFiles = withCheckedFiles doesFileExist
-
-store :: FilePath -> ChunkConfig -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
-store d chunkconfig k _f p = sendAnnex k (void $ remove d k) $ \src ->
- metered (Just p) k $ \meterupdate ->
- storeHelper d chunkconfig k k $ \dests ->
- case chunkconfig of
- LegacyChunks chunksize ->
- storeLegacyChunked meterupdate chunksize dests
- =<< L.readFile src
- _ -> do
- let dest = Prelude.head dests
- meteredWriteFile meterupdate dest
- =<< L.readFile src
- return [dest]
-
-storeEncrypted :: FilePath -> [CommandParam] -> ChunkConfig -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
-storeEncrypted d gpgOpts chunkconfig (cipher, enck) k p = sendAnnex k (void $ remove d enck) $ \src ->
- metered (Just p) k $ \meterupdate ->
- storeHelper d chunkconfig enck k $ \dests ->
- encrypt gpgOpts cipher (feedFile src) $ readBytes $ \b ->
- case chunkconfig of
- LegacyChunks chunksize ->
- storeLegacyChunked meterupdate chunksize dests b
- _ -> do
- let dest = Prelude.head dests
- meteredWriteFile meterupdate dest b
- return [dest]
-
-{- Splits a ByteString into chunks and writes to dests, obeying configured
- - chunk size (not to be confused with the L.ByteString chunk size).
- - Note: Must always write at least one file, even for empty ByteString. -}
-storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
-storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call"
-storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b
- | L.null b = do
- -- must always write at least one file, even for empty
- L.writeFile firstdest b
- return [firstdest]
- | otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) []
-storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath]
-storeLegacyChunked' _ _ [] _ _ = error "ran out of dests"
-storeLegacyChunked' _ _ _ [] c = return $ reverse c
-storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do
- bs' <- withFile d WriteMode $
- feed zeroBytesProcessed chunksize bs
- storeLegacyChunked' meterupdate chunksize dests bs' (d:c)
- where
- feed _ _ [] _ = return []
- feed bytes sz (l:ls) h = do
- let len = S.length l
- let s = fromIntegral len
- if s <= sz || sz == chunksize
- then do
- S.hPut h l
- let bytes' = addBytesProcessed bytes len
- meterupdate bytes'
- feed bytes' (sz - s) ls h
- else return (l:ls)
-
-storeHelper :: FilePath -> ChunkConfig -> Key -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool
-storeHelper d chunkconfig key origkey storer = check <&&> liftIO go
- where
- tmpdir = tmpDir d key
- destdir = storeDir d key
-
- {- An encrypted key does not have a known size,
- - so check that the size of the original key is available as free
- - space. -}
- check = do
- liftIO $ createDirectoryIfMissing True tmpdir
- checkDiskSpace (Just tmpdir) origkey 0
-
- go = case chunkconfig of
- NoChunks -> flip catchNonAsync (\e -> print e >> return False) $ do
- let tmpf = tmpdir </> keyFile key
- void $ storer [tmpf]
+{- Check if there is enough free disk space in the remote's directory to
+ - store the key. Note that the unencrypted key size is checked. -}
+prepareStore :: FilePath -> ChunkConfig -> Preparer Storer
+prepareStore d chunkconfig = checkPrepare
+ (\k -> checkDiskSpace (Just d) k 0)
+ (byteStorer $ store d chunkconfig)
+
+store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex Bool
+store d chunkconfig k b p = liftIO $ do
+ void $ tryIO $ createDirectoryIfMissing True tmpdir
+ case chunkconfig of
+ LegacyChunks chunksize -> Legacy.store chunksize finalizer k b p tmpdir destdir
+ _ -> do
+ let tmpf = tmpdir </> keyFile k
+ meteredWriteFile p tmpf b
finalizer tmpdir destdir
return True
- UnpaddedChunks _ -> error "TODO: storeHelper with UnpaddedChunks"
- LegacyChunks _ -> Legacy.storeChunks key tmpdir destdir storer recorder finalizer
-
+ where
+ tmpdir = tmpDir d k
+ destdir = storeDir d k
finalizer tmp dest = do
void $ tryIO $ allowWrite dest -- may already exist
void $ tryIO $ removeDirectoryRecursive dest -- or not exist
@@ -212,38 +134,21 @@ storeHelper d chunkconfig key origkey storer = check <&&> liftIO go
void $ tryIO $ do
mapM_ preventWrite =<< dirContents dest
preventWrite dest
-
- recorder f s = do
- void $ tryIO $ allowWrite f
- writeFile f s
- void $ tryIO $ preventWrite f
-
-retrieve :: FilePath -> ChunkConfig -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
-retrieve d chunkconfig k _ f p = metered (Just p) k $ \meterupdate ->
- liftIO $ withStoredFiles chunkconfig d k $ \files ->
- catchBoolIO $ do
- meteredWriteFileChunks meterupdate f files L.readFile
- return True
-retrieveEncrypted :: FilePath -> ChunkConfig -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
-retrieveEncrypted d chunkconfig (cipher, enck) k f p = metered (Just p) k $ \meterupdate ->
- liftIO $ withStoredFiles chunkconfig d enck $ \files ->
- catchBoolIO $ do
- decrypt cipher (feeder files) $
- readBytes $ meteredWriteFile meterupdate f
- return True
- where
- feeder files h = forM_ files $ L.hPut h <=< L.readFile
+retrieve :: FilePath -> ChunkConfig -> Preparer Retriever
+retrieve d (LegacyChunks _) = Legacy.retrieve locations d
+retrieve d _ = simplyPrepare $ byteRetriever $ \k ->
+ liftIO $ L.readFile =<< getLocation d k
retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool
--- no cheap retrieval for chunks
+-- no cheap retrieval possible for chunks
retrieveCheap _ (UnpaddedChunks _) _ _ = return False
retrieveCheap _ (LegacyChunks _) _ _ = return False
#ifndef mingw32_HOST_OS
-retrieveCheap d ck k f = liftIO $ withStoredFiles ck d k go
- where
- go [file] = catchBoolIO $ createSymbolicLink file f >> return True
- go _files = return False
+retrieveCheap d NoChunks k f = liftIO $ catchBoolIO $ do
+ file <- getLocation d k
+ createSymbolicLink file f
+ return True
#else
retrieveCheap _ _ _ _ = return False
#endif
@@ -256,12 +161,25 @@ remove d k = liftIO $ do
- before it can delete them. -}
void $ tryIO $ mapM_ allowWrite =<< dirContents dir
#endif
- catchBoolIO $ do
+ ok <- catchBoolIO $ do
removeDirectoryRecursive dir
return True
+ {- Removing the subdirectory will fail if it doesn't exist.
+ - But, we want to succeed in that case, as long as the directory
+ - remote's top-level directory does exist. -}
+ if ok
+ then return ok
+ else doesDirectoryExist d <&&> (not <$> doesDirectoryExist dir)
where
dir = storeDir d k
checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool)
-checkPresent d chunkconfig k = liftIO $ catchMsgIO $ withStoredFiles chunkconfig d k $
- const $ return True -- withStoredFiles checked that it exists
+checkPresent d (LegacyChunks _) k = Legacy.checkPresent d locations k
+checkPresent d _ k = liftIO $ do
+ v <- catchMsgIO $ anyM doesFileExist (locations d k)
+ case v of
+ Right False -> ifM (doesDirectoryExist d)
+ ( return v
+ , return $ Left $ "directory " ++ d ++ " is not accessible"
+ )
+ _ -> return v
diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs
new file mode 100644
index 000000000..312119f4e
--- /dev/null
+++ b/Remote/Directory/LegacyChunked.hs
@@ -0,0 +1,112 @@
+{- Legacy chunksize support for directory special remote.
+ -
+ - Can be removed eventually.
+ -
+ - Copyright 2011-2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE Rank2Types #-}
+
+module Remote.Directory.LegacyChunked where
+
+import qualified Data.ByteString.Lazy as L
+import qualified Data.ByteString as S
+
+import Common.Annex
+import Utility.FileMode
+import Remote.Helper.ChunkedEncryptable
+import qualified Remote.Helper.Chunked.Legacy as Legacy
+import Annex.Perms
+import Utility.Metered
+
+withCheckedFiles :: (FilePath -> IO Bool) -> FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool
+withCheckedFiles _ [] _locations _ _ = return False
+withCheckedFiles check d locations k a = go $ locations d k
+ where
+ go [] = return False
+ go (f:fs) = do
+ let chunkcount = f ++ Legacy.chunkCount
+ ifM (check chunkcount)
+ ( do
+ chunks <- Legacy.listChunks f <$> readFile chunkcount
+ ifM (allM check chunks)
+ ( a chunks , return False )
+ , do
+ chunks <- Legacy.probeChunks f check
+ if null chunks
+ then go fs
+ else a chunks
+ )
+withStoredFiles :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool
+withStoredFiles = withCheckedFiles doesFileExist
+
+{- Splits a ByteString into chunks and writes to dests, obeying configured
+ - chunk size (not to be confused with the L.ByteString chunk size). -}
+storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
+storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call"
+storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b
+ | L.null b = do
+ -- always write at least one file, even for empty
+ L.writeFile firstdest b
+ return [firstdest]
+ | otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) []
+storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath]
+storeLegacyChunked' _ _ [] _ _ = error "ran out of dests"
+storeLegacyChunked' _ _ _ [] c = return $ reverse c
+storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do
+ bs' <- withFile d WriteMode $
+ feed zeroBytesProcessed chunksize bs
+ storeLegacyChunked' meterupdate chunksize dests bs' (d:c)
+ where
+ feed _ _ [] _ = return []
+ feed bytes sz (l:ls) h = do
+ let len = S.length l
+ let s = fromIntegral len
+ if s <= sz || sz == chunksize
+ then do
+ S.hPut h l
+ let bytes' = addBytesProcessed bytes len
+ meterupdate bytes'
+ feed bytes' (sz - s) ls h
+ else return (l:ls)
+
+storeHelper :: (FilePath -> FilePath -> IO ()) -> Key -> ([FilePath] -> IO [FilePath]) -> FilePath -> FilePath -> IO Bool
+storeHelper finalizer key storer tmpdir destdir = do
+ void $ liftIO $ tryIO $ createDirectoryIfMissing True tmpdir
+ Legacy.storeChunks key tmpdir destdir storer recorder finalizer
+ where
+ recorder f s = do
+ void $ tryIO $ allowWrite f
+ writeFile f s
+ void $ tryIO $ preventWrite f
+
+store :: ChunkSize -> (FilePath -> FilePath -> IO ()) -> Key -> L.ByteString -> MeterUpdate -> FilePath -> FilePath -> IO Bool
+store chunksize finalizer k b p = storeHelper finalizer k $ \dests ->
+ storeLegacyChunked p chunksize dests b
+
+{- Need to get a single ByteString containing every chunk.
+ - Done very innefficiently, by writing to a temp file.
+ - :/ This is legacy code..
+ -}
+retrieve :: (FilePath -> Key -> [FilePath]) -> FilePath -> Preparer Retriever
+retrieve locations d basek a = do
+ showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow."
+ tmpdir <- fromRepo $ gitAnnexTmpMiscDir
+ createAnnexDirectory tmpdir
+ let tmp = tmpdir </> keyFile basek ++ ".directorylegacy.tmp"
+ a $ Just $ byteRetriever $ \k -> liftIO $ do
+ void $ withStoredFiles d locations k $ \fs -> do
+ forM_ fs $
+ S.appendFile tmp <=< S.readFile
+ return True
+ b <- L.readFile tmp
+ nukeFile tmp
+ return b
+
+checkPresent :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex (Either String Bool)
+checkPresent d locations k = liftIO $ catchMsgIO $
+ withStoredFiles d locations k $
+ -- withStoredFiles checked that it exists
+ const $ return True
diff --git a/Remote/External.hs b/Remote/External.hs
index 464e9b57e..1c22a589b 100644
--- a/Remote/External.hs
+++ b/Remote/External.hs
@@ -15,14 +15,12 @@ import Types.CleanupActions
import qualified Git
import Config
import Remote.Helper.Special
-import Remote.Helper.Encryptable
-import Crypto
+import Remote.Helper.ChunkedEncryptable
import Utility.Metered
import Logs.Transfer
import Logs.PreferredContent.Raw
import Logs.RemoteState
import Config.Cost
-import Annex.Content
import Annex.UUID
import Annex.Exception
import Creds
@@ -30,7 +28,6 @@ import Creds
import Control.Concurrent.STM
import System.Log.Logger (debugM)
import qualified Data.Map as M
-import qualified Data.ByteString.Lazy as L
remote :: RemoteType
remote = RemoteType {
@@ -46,15 +43,15 @@ gen r u c gc = do
Annex.addCleanup (RemoteCleanup u) $ stopExternal external
cst <- getCost external r gc
avail <- getAvailability external r gc
- return $ Just $ encryptableRemote c
- (storeEncrypted external $ getGpgEncParams (c,gc))
- (retrieveEncrypted external)
+ return $ Just $ chunkedEncryptableRemote c
+ (simplyPrepare $ store external)
+ (simplyPrepare $ retrieve external)
Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
- storeKey = store external,
- retrieveKeyFile = retrieve external,
+ storeKey = storeKeyDummy,
+ retrieveKeyFile = retreiveKeyFileDummy,
retrieveKeyFileCheap = \_ _ -> return False,
removeKey = remove external,
hasKey = checkPresent external,
@@ -90,25 +87,8 @@ externalSetup mu _ c = do
gitConfigSpecialRemote u c'' "externaltype" externaltype
return (c'', u)
-store :: External -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
-store external k _f p = sendAnnex k rollback $ \f ->
- metered (Just p) k $
- storeHelper external k f
- where
- rollback = void $ remove external k
-
-storeEncrypted :: External -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
-storeEncrypted external gpgOpts (cipher, enck) k p = withTmp enck $ \tmp ->
- sendAnnex k rollback $ \src -> do
- metered (Just p) k $ \meterupdate -> do
- liftIO $ encrypt gpgOpts cipher (feedFile src) $
- readBytes $ L.writeFile tmp
- storeHelper external enck tmp meterupdate
- where
- rollback = void $ remove external enck
-
-storeHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool
-storeHelper external k f p = safely $
+store :: External -> Storer
+store external = fileStorer $ \k f p ->
handleRequest external (TRANSFER Upload k f) (Just p) $ \resp ->
case resp of
TRANSFER_SUCCESS Upload k' | k == k' ->
@@ -119,31 +99,15 @@ storeHelper external k f p = safely $
return False
_ -> Nothing
-retrieve :: External -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
-retrieve external k _f d p = metered (Just p) k $
- retrieveHelper external k d
-
-retrieveEncrypted :: External -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
-retrieveEncrypted external (cipher, enck) k f p = withTmp enck $ \tmp ->
- metered (Just p) k $ \meterupdate ->
- ifM (retrieveHelper external enck tmp meterupdate)
- ( liftIO $ catchBoolIO $ do
- decrypt cipher (feedFile tmp) $
- readBytes $ L.writeFile f
- return True
- , return False
- )
-
-retrieveHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool
-retrieveHelper external k d p = safely $
+retrieve :: External -> Retriever
+retrieve external = fileRetriever $ \d k p ->
handleRequest external (TRANSFER Download k d) (Just p) $ \resp ->
case resp of
TRANSFER_SUCCESS Download k'
- | k == k' -> Just $ return True
+ | k == k' -> Just $ return ()
TRANSFER_FAILURE Download k' errmsg
| k == k' -> Just $ do
- warning errmsg
- return False
+ error errmsg
_ -> Nothing
remove :: External -> Key -> Annex Bool
diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs
index 00be9e1a9..bf8f05061 100644
--- a/Remote/Glacier.hs
+++ b/Remote/Glacier.hs
@@ -282,7 +282,8 @@ jobList r keys = go =<< glacierEnv (config r) (uuid r)
then return nada
else do
enckeys <- forM keys $ \k ->
- maybe k snd <$> cipherKey (config r) k
+ maybe k (\(_, enck) -> enck k)
+ <$> cipherKey (config r)
let keymap = M.fromList $ zip enckeys keys
let convert = mapMaybe (`M.lookup` keymap)
return (convert succeeded, convert failed)
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs
index 031ff63d6..0d786c98d 100644
--- a/Remote/Helper/Chunked.hs
+++ b/Remote/Helper/Chunked.hs
@@ -1,22 +1,30 @@
{- git-annex chunked remotes
-
- - Copyright 2012-2014 Joey Hess <joey@kitenet.net>
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
-module Remote.Helper.Chunked
- ( ChunkSize
- , ChunkConfig(..)
- , chunkConfig
- , meteredWriteFileChunks
- ) where
+module Remote.Helper.Chunked (
+ ChunkSize,
+ ChunkConfig(..),
+ chunkConfig,
+ storeChunks,
+ removeChunks,
+ retrieveChunks,
+ hasKeyChunks,
+) where
import Common.Annex
import Utility.DataUnits
+import Types.StoreRetrieve
import Types.Remote
-import Logs.Chunk.Pure (ChunkSize)
+import Types.Key
+import Logs.Chunk
import Utility.Metered
+import Crypto (EncKey)
+import Backend (isStableKey)
+import Annex.Exception
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
@@ -25,23 +33,366 @@ data ChunkConfig
= NoChunks
| UnpaddedChunks ChunkSize
| LegacyChunks ChunkSize
+ deriving (Show)
+
+noChunks :: ChunkConfig -> Bool
+noChunks NoChunks = True
+noChunks _ = False
chunkConfig :: RemoteConfig -> ChunkConfig
chunkConfig m =
case M.lookup "chunksize" m of
Nothing -> case M.lookup "chunk" m of
Nothing -> NoChunks
- Just v -> UnpaddedChunks $ readsz v "chunk"
- Just v -> LegacyChunks $ readsz v "chunksize"
- where
- readsz v f = case readSize dataUnits v of
- Just size | size > 0 -> fromInteger size
- _ -> error ("bad " ++ f)
-
-{- Writes a series of chunks to a file. The feeder is called to get
- - each chunk. -}
-meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
-meteredWriteFileChunks meterupdate dest chunks feeder =
- withBinaryFile dest WriteMode $ \h ->
- forM_ chunks $
- meteredWrite meterupdate h <=< feeder
+ Just v -> readsz UnpaddedChunks v "chunk"
+ Just v -> readsz LegacyChunks v "chunksize"
+ where
+ readsz c v f = case readSize dataUnits v of
+ Just size
+ | size == 0 -> NoChunks
+ | size > 0 -> c (fromInteger size)
+ _ -> error $ "bad configuration " ++ f ++ "=" ++ v
+
+-- An infinite stream of chunk keys, starting from chunk 1.
+newtype ChunkKeyStream = ChunkKeyStream [Key]
+
+chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
+chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
+ where
+ mk chunknum = sizedk { keyChunkNum = Just chunknum }
+ sizedk = basek { keyChunkSize = Just (toInteger chunksize) }
+
+nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
+nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)
+nextChunkKeyStream (ChunkKeyStream []) = undefined -- stream is infinite!
+
+takeChunkKeyStream :: ChunkCount -> ChunkKeyStream -> [Key]
+takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l
+
+-- Number of chunks already consumed from the stream.
+numChunks :: ChunkKeyStream -> Integer
+numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream
+
+{- Splits up the key's content into chunks, passing each chunk to
+ - the storer action, along with a corresponding chunk key and a
+ - progress meter update callback.
+ -
+ - To support resuming, the checker is used to find the first missing
+ - chunk key. Storing starts from that chunk.
+ -
+ - This buffers each chunk in memory, so can use a lot of memory
+ - with a large ChunkSize.
+ - More optimal versions of this can be written, that rely
+ - on L.toChunks to split the lazy bytestring into chunks (typically
+ - smaller than the ChunkSize), and eg, write those chunks to a Handle.
+ - But this is the best that can be done with the storer interface that
+ - writes a whole L.ByteString at a time.
+ -}
+storeChunks
+ :: UUID
+ -> ChunkConfig
+ -> Key
+ -> FilePath
+ -> MeterUpdate
+ -> (Key -> ContentSource -> MeterUpdate -> Annex Bool)
+ -> (Key -> Annex (Either String Bool))
+ -> Annex Bool
+storeChunks u chunkconfig k f p storer checker =
+ case chunkconfig of
+ (UnpaddedChunks chunksize) | isStableKey k ->
+ bracketIO open close (go chunksize)
+ _ -> showprogress $ storer k (FileContent f)
+ where
+ showprogress = metered (Just p) k
+
+ open = tryIO $ openBinaryFile f ReadMode
+
+ close (Right h) = hClose h
+ close (Left _) = noop
+
+ go _ (Left e) = do
+ warning (show e)
+ return False
+ go chunksize (Right h) = showprogress $ \meterupdate -> do
+ let chunkkeys = chunkKeyStream k chunksize
+ (chunkkeys', startpos) <- seekResume h chunkkeys checker
+ b <- liftIO $ L.hGetContents h
+ gochunks meterupdate startpos chunksize b chunkkeys'
+
+ gochunks :: MeterUpdate -> BytesProcessed -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool
+ gochunks meterupdate startpos chunksize = loop startpos . splitchunk
+ where
+ splitchunk = L.splitAt chunksize
+
+ loop bytesprocessed (chunk, bs) chunkkeys
+ | L.null chunk && numchunks > 0 = do
+ -- Once all chunks are successfully
+ -- stored, update the chunk log.
+ chunksStored u k (FixedSizeChunks chunksize) numchunks
+ return True
+ | otherwise = do
+ liftIO $ meterupdate' zeroBytesProcessed
+ let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys
+ ifM (storer chunkkey (ByteContent chunk) meterupdate')
+ ( do
+ let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk)
+ loop bytesprocessed' (splitchunk bs) chunkkeys'
+ , return False
+ )
+ where
+ numchunks = numChunks chunkkeys
+ {- The MeterUpdate that is passed to the action
+ - storing a chunk is offset, so that it reflects
+ - the total bytes that have already been stored
+ - in previous chunks. -}
+ meterupdate' = offsetMeterUpdate meterupdate bytesprocessed
+
+{- Check if any of the chunk keys are present. If found, seek forward
+ - in the Handle, so it will be read starting at the first missing chunk.
+ - Returns the ChunkKeyStream truncated to start at the first missing
+ - chunk, and the number of bytes skipped due to resuming.
+ -
+ - As an optimisation, if the file fits into a single chunk, there's no need
+ - to check if that chunk is present -- we know it's not, because otherwise
+ - the whole file would be present and there would be no reason to try to
+ - store it.
+ -}
+seekResume
+ :: Handle
+ -> ChunkKeyStream
+ -> (Key -> Annex (Either String Bool))
+ -> Annex (ChunkKeyStream, BytesProcessed)
+seekResume h chunkkeys checker = do
+ sz <- liftIO (hFileSize h)
+ if sz <= fromMaybe 0 (keyChunkSize $ fst $ nextChunkKeyStream chunkkeys)
+ then return (chunkkeys, zeroBytesProcessed)
+ else check 0 chunkkeys sz
+ where
+ check pos cks sz
+ | pos >= sz = do
+ -- All chunks are already stored!
+ liftIO $ hSeek h AbsoluteSeek sz
+ return (cks, toBytesProcessed sz)
+ | otherwise = do
+ v <- checker k
+ case v of
+ Right True ->
+ check pos' cks' sz
+ _ -> do
+ when (pos > 0) $
+ liftIO $ hSeek h AbsoluteSeek pos
+ return (cks, toBytesProcessed pos)
+ where
+ (k, cks') = nextChunkKeyStream cks
+ pos' = pos + fromMaybe 0 (keyChunkSize k)
+
+{- Removes all chunks of a key from a remote, by calling a remover
+ - action on each.
+ -
+ - The remover action should succeed even if asked to
+ - remove a key that is not present on the remote.
+ -
+ - This action may be called on a chunked key. It will simply remove it.
+ -}
+removeChunks :: (Key -> Annex Bool) -> UUID -> ChunkConfig -> EncKey -> Key -> Annex Bool
+removeChunks remover u chunkconfig encryptor k = do
+ ls <- chunkKeys u chunkconfig k
+ ok <- allM (remover . encryptor) (concat ls)
+ when ok $ do
+ let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls
+ forM_ chunksizes $ chunksRemoved u k . FixedSizeChunks . fromIntegral
+ return ok
+
+{- Retrieves a key from a remote, using a retriever action.
+ -
+ - When the remote is chunked, tries each of the options returned by
+ - chunkKeys until it finds one where the retriever successfully
+ - gets the first chunked key. The content of that key, and any
+ - other chunks in the list is fed to the sink.
+ -
+ - If retrival of one of the subsequent chunks throws an exception,
+ - gives up and returns False. Note that partial data may have been
+ - written to the sink in this case.
+ -
+ - Resuming is supported when using chunks. When the destination file
+ - already exists, it skips to the next chunked key that would be needed
+ - to resume.
+ -}
+retrieveChunks
+ :: Retriever
+ -> UUID
+ -> ChunkConfig
+ -> EncKey
+ -> Key
+ -> FilePath
+ -> MeterUpdate
+ -> (Maybe Handle -> Maybe MeterUpdate -> ContentSource -> Annex Bool)
+ -> Annex Bool
+retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
+ | noChunks chunkconfig =
+ -- Optimisation: Try the unchunked key first, to avoid
+ -- looking in the git-annex branch for chunk counts
+ -- that are likely not there.
+ getunchunked `catchNonAsyncAnnex`
+ const (go =<< chunkKeysOnly u basek)
+ | otherwise = go =<< chunkKeys u chunkconfig basek
+ where
+ go ls = do
+ currsize <- liftIO $ catchMaybeIO $
+ toInteger . fileSize <$> getFileStatus dest
+ let ls' = maybe ls (setupResume ls) currsize
+ if any null ls'
+ then return True -- dest is already complete
+ else firstavail currsize ls' `catchNonAsyncAnnex` giveup
+
+ giveup e = do
+ warning (show e)
+ return False
+
+ firstavail _ [] = return False
+ firstavail currsize ([]:ls) = firstavail currsize ls
+ firstavail currsize ((k:ks):ls)
+ | k == basek = getunchunked
+ `catchNonAsyncAnnex` (const $ firstavail currsize ls)
+ | otherwise = do
+ let offset = resumeOffset currsize k
+ let p = maybe basep
+ (offsetMeterUpdate basep . toBytesProcessed)
+ offset
+ v <- tryNonAsyncAnnex $
+ retriever (encryptor k) p $ \content ->
+ bracketIO (maybe opennew openresume offset) hClose $ \h -> do
+ void $ tosink (Just h) p content
+ let sz = toBytesProcessed $
+ fromMaybe 0 $ keyChunkSize k
+ getrest p h sz sz ks
+ `catchNonAsyncAnnex` giveup
+ case v of
+ Left e
+ | null ls -> giveup e
+ | otherwise -> firstavail currsize ls
+ Right r -> return r
+
+ getrest _ _ _ _ [] = return True
+ getrest p h sz bytesprocessed (k:ks) = do
+ let p' = offsetMeterUpdate p bytesprocessed
+ liftIO $ p' zeroBytesProcessed
+ ifM (retriever (encryptor k) p' $ tosink (Just h) p')
+ ( getrest p h sz (addBytesProcessed bytesprocessed sz) ks
+ , giveup "chunk retrieval failed"
+ )
+
+ getunchunked = retriever (encryptor basek) basep $ tosink Nothing basep
+
+ opennew = openBinaryFile dest WriteMode
+
+ -- Open the file and seek to the start point in order to resume.
+ openresume startpoint = do
+ -- ReadWriteMode allows seeking; AppendMode does not.
+ h <- openBinaryFile dest ReadWriteMode
+ hSeek h AbsoluteSeek startpoint
+ return h
+
+ {- Progress meter updating is a bit tricky: If the Retriever
+ - populates a file, it is responsible for updating progress
+ - as the file is being retrieved.
+ -
+ - However, if the Retriever generates a lazy ByteString,
+ - it is not responsible for updating progress (often it cannot).
+ - Instead, the sink is passed a meter to update as it consumes
+ - the ByteString.
+ -}
+ tosink h p content = sink h p' content
+ where
+ p'
+ | isByteContent content = Just p
+ | otherwise = Nothing
+
+{- Can resume when the chunk's offset is at or before the end of
+ - the dest file. -}
+resumeOffset :: Maybe Integer -> Key -> Maybe Integer
+resumeOffset Nothing _ = Nothing
+resumeOffset currsize k
+ | offset <= currsize = offset
+ | otherwise = Nothing
+ where
+ offset = chunkKeyOffset k
+
+{- Drops chunks that are already present in a file, based on its size.
+ - Keeps any non-chunk keys.
+ -}
+setupResume :: [[Key]] -> Integer -> [[Key]]
+setupResume ls currsize = map dropunneeded ls
+ where
+ dropunneeded [] = []
+ dropunneeded l@(k:_) = case keyChunkSize k of
+ Just chunksize | chunksize > 0 ->
+ genericDrop (currsize `div` chunksize) l
+ _ -> l
+
+{- Checks if a key is present in a remote. This requires any one
+ - of the lists of options returned by chunkKeys to all check out
+ - as being present using the checker action.
+ -}
+hasKeyChunks
+ :: (Key -> Annex (Either String Bool))
+ -> UUID
+ -> ChunkConfig
+ -> EncKey
+ -> Key
+ -> Annex (Either String Bool)
+hasKeyChunks checker u chunkconfig encryptor basek
+ | noChunks chunkconfig =
+ -- Optimisation: Try the unchunked key first, to avoid
+ -- looking in the git-annex branch for chunk counts
+ -- that are likely not there.
+ ifM ((Right True ==) <$> checker (encryptor basek))
+ ( return (Right True)
+ , checklists Nothing =<< chunkKeysOnly u basek
+ )
+ | otherwise = checklists Nothing =<< chunkKeys u chunkconfig basek
+ where
+ checklists Nothing [] = return (Right False)
+ checklists (Just deferrederror) [] = return (Left deferrederror)
+ checklists d (l:ls)
+ | not (null l) = do
+ v <- checkchunks l
+ case v of
+ Left e -> checklists (Just e) ls
+ Right True -> return (Right True)
+ Right False -> checklists Nothing ls
+ | otherwise = checklists d ls
+
+ checkchunks :: [Key] -> Annex (Either String Bool)
+ checkchunks [] = return (Right True)
+ checkchunks (k:ks) = do
+ v <- checker (encryptor k)
+ if v == Right True
+ then checkchunks ks
+ else return v
+
+{- A key can be stored in a remote unchunked, or as a list of chunked keys.
+ - This can be the case whether or not the remote is currently configured
+ - to use chunking.
+ -
+ - It's even possible for a remote to have the same key stored multiple
+ - times with different chunk sizes!
+ -
+ - This finds all possible lists of keys that might be on the remote that
+ - can be combined to get back the requested key, in order from most to
+ - least likely to exist.
+ -}
+chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]]
+chunkKeys u chunkconfig k = do
+ l <- chunkKeysOnly u k
+ return $ if noChunks chunkconfig
+ then [k] : l
+ else l ++ [[k]]
+
+chunkKeysOnly :: UUID -> Key -> Annex [[Key]]
+chunkKeysOnly u k = map (toChunkList k) <$> getCurrentChunks u k
+
+toChunkList :: Key -> (ChunkMethod, ChunkCount) -> [Key]
+toChunkList k (FixedSizeChunks chunksize, chunkcount) =
+ takeChunkKeyStream chunkcount $ chunkKeyStream k chunksize
+toChunkList _ (UnknownChunks _, _) = []
diff --git a/Remote/Helper/Chunked/Legacy.hs b/Remote/Helper/Chunked/Legacy.hs
index 1ec0eb68f..4f402705a 100644
--- a/Remote/Helper/Chunked/Legacy.hs
+++ b/Remote/Helper/Chunked/Legacy.hs
@@ -9,6 +9,7 @@ module Remote.Helper.Chunked.Legacy where
import Common.Annex
import Remote.Helper.Chunked
+import Utility.Metered
import qualified Data.ByteString.Lazy as L
import qualified Control.Exception as E
@@ -73,7 +74,7 @@ storeChunks key tmp dest storer recorder finalizer = either onerr return
finalizer tmp dest
return (not $ null stored)
onerr e = do
- print e
+ warningIO (show e)
return False
basef = tmp ++ keyFile key
@@ -104,7 +105,7 @@ storeChunked chunksize dests storer content = either onerr return
| otherwise = storechunks sz [] dests content
onerr e = do
- print e
+ warningIO (show e)
return []
storechunks _ _ [] _ = return [] -- ran out of dests
@@ -114,3 +115,12 @@ storeChunked chunksize dests storer content = either onerr return
let (chunk, b') = L.splitAt sz b
storer d chunk
storechunks sz (d:useddests) ds b'
+
+{- Writes a series of chunks to a file. The feeder is called to get
+ - each chunk.
+ -}
+meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
+meteredWriteFileChunks meterupdate dest chunks feeder =
+ withBinaryFile dest WriteMode $ \h ->
+ forM_ chunks $
+ meteredWrite meterupdate h <=< feeder
diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs
new file mode 100644
index 000000000..2a844212b
--- /dev/null
+++ b/Remote/Helper/ChunkedEncryptable.hs
@@ -0,0 +1,200 @@
+{- Remotes that support both chunking and encryption.
+ -
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE RankNTypes #-}
+
+module Remote.Helper.ChunkedEncryptable (
+ Preparer,
+ Storer,
+ Retriever,
+ simplyPrepare,
+ ContentSource,
+ checkPrepare,
+ fileStorer,
+ byteStorer,
+ fileRetriever,
+ byteRetriever,
+ storeKeyDummy,
+ retreiveKeyFileDummy,
+ chunkedEncryptableRemote,
+ module X
+) where
+
+import Common.Annex
+import Types.StoreRetrieve
+import Types.Remote
+import Crypto
+import Config.Cost
+import Utility.Metered
+import Remote.Helper.Chunked as X
+import Remote.Helper.Encryptable as X
+import Annex.Content
+import Annex.Exception
+
+import qualified Data.ByteString.Lazy as L
+import Control.Exception (bracket)
+
+-- Use when nothing needs to be done to prepare a helper.
+simplyPrepare :: helper -> Preparer helper
+simplyPrepare helper _ a = a $ Just helper
+
+-- Use to run a check when preparing a helper.
+checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper
+checkPrepare checker helper k a = ifM (checker k)
+ ( a (Just helper)
+ , a Nothing
+ )
+
+-- A Storer that expects to be provided with a file containing
+-- the content of the key to store.
+fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer
+fileStorer a k (FileContent f) m = a k f m
+fileStorer a k (ByteContent b) m = withTmp k $ \f -> do
+ liftIO $ L.writeFile f b
+ a k f m
+
+-- A Storer that expects to be provided with a L.ByteString of
+-- the content to store.
+byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer
+byteStorer a k c m = withBytes c $ \b -> a k b m
+
+-- A Retriever that writes the content of a Key to a provided file.
+-- It is responsible for updating the progress meter as it retrieves data.
+fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
+fileRetriever a k m callback = do
+ f <- prepTmp k
+ a f k m
+ callback (FileContent f)
+
+-- A Retriever that generates a L.ByteString containing the Key's content.
+byteRetriever :: (Key -> Annex L.ByteString) -> Retriever
+byteRetriever a k _m callback = callback =<< (ByteContent <$> a k)
+
+{- The base Remote that is provided to chunkedEncryptableRemote
+ - needs to have storeKey and retreiveKeyFile methods, but they are
+ - never actually used (since chunkedEncryptableRemote replaces
+ - them). Here are some dummy ones.
+ -}
+storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool
+storeKeyDummy _ _ _ = return False
+retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
+retreiveKeyFileDummy _ _ _ _ = return False
+
+-- Modifies a base Remote to support both chunking and encryption.
+chunkedEncryptableRemote
+ :: RemoteConfig
+ -> Preparer Storer
+ -> Preparer Retriever
+ -> Remote
+ -> Remote
+chunkedEncryptableRemote c preparestorer prepareretriever baser = encr
+ where
+ encr = baser
+ { storeKey = \k _f p -> cip >>= storeKeyGen k p
+ , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p
+ , retrieveKeyFileCheap = \k d -> cip >>= maybe
+ (retrieveKeyFileCheap baser k d)
+ (\_ -> return False)
+ , removeKey = \k -> cip >>= removeKeyGen k
+ , hasKey = \k -> cip >>= hasKeyGen k
+ , cost = maybe
+ (cost baser)
+ (const $ cost baser + encryptedRemoteCostAdj)
+ (extractCipher c)
+ }
+ cip = cipherKey c
+ chunkconfig = chunkConfig c
+ gpgopts = getGpgEncParams encr
+
+ safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False)
+
+ -- chunk, then encrypt, then feed to the storer
+ storeKeyGen k p enc =
+ safely $ preparestorer k $ safely . go
+ where
+ go (Just storer) = sendAnnex k rollback $ \src ->
+ metered (Just p) k $ \p' ->
+ storeChunks (uuid baser) chunkconfig k src p'
+ (storechunk enc storer)
+ (hasKey baser)
+ go Nothing = return False
+ rollback = void $ removeKey encr k
+
+ storechunk Nothing storer k content p = storer k content p
+ storechunk (Just (cipher, enck)) storer k content p =
+ withBytes content $ \b ->
+ encrypt gpgopts cipher (feedBytes b) $
+ readBytes $ \encb ->
+ storer (enck k) (ByteContent encb) p
+
+ -- call retriever to get chunks; decrypt them; stream to dest file
+ retrieveKeyFileGen k dest p enc =
+ safely $ prepareretriever k $ safely . go
+ where
+ go (Just retriever) = metered (Just p) k $ \p' ->
+ retrieveChunks retriever (uuid baser) chunkconfig
+ enck k dest p' (sink dest enc)
+ go Nothing = return False
+ enck = maybe id snd enc
+
+ removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k
+ where
+ enck = maybe id snd enc
+ remover = removeKey baser
+
+ hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k
+ where
+ enck = maybe id snd enc
+ checker = hasKey baser
+
+{- Sink callback for retrieveChunks. Stores the file content into the
+ - provided Handle, decrypting it first if necessary.
+ -
+ - If the remote did not store the content using chunks, no Handle
+ - will be provided, and it's up to us to open the destination file.
+ -
+ - Note that when neither chunking nor encryption is used, and the remote
+ - provides FileContent, that file only needs to be renamed
+ - into place. (And it may even already be in the right place..)
+ -}
+sink
+ :: FilePath
+ -> Maybe (Cipher, EncKey)
+ -> Maybe Handle
+ -> Maybe MeterUpdate
+ -> ContentSource
+ -> Annex Bool
+sink dest enc mh mp content = do
+ case (enc, mh, content) of
+ (Nothing, Nothing, FileContent f)
+ | f == dest -> noop
+ | otherwise -> liftIO $ moveFile f dest
+ (Just (cipher, _), _, ByteContent b) ->
+ decrypt cipher (feedBytes b) $
+ readBytes write
+ (Just (cipher, _), _, FileContent f) -> do
+ withBytes content $ \b ->
+ decrypt cipher (feedBytes b) $
+ readBytes write
+ liftIO $ nukeFile f
+ (Nothing, _, FileContent f) -> do
+ withBytes content write
+ liftIO $ nukeFile f
+ (Nothing, _, ByteContent b) -> write b
+ return True
+ where
+ write b = case mh of
+ Just h -> liftIO $ b `streamto` h
+ Nothing -> liftIO $ bracket opendest hClose (b `streamto`)
+ streamto b h = case mp of
+ Just p -> meteredWrite p h b
+ Nothing -> L.hPut h b
+ opendest = openBinaryFile dest WriteMode
+
+withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a
+withBytes (ByteContent b) a = a b
+withBytes (FileContent f) a = a =<< liftIO (L.readFile f)
diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs
index 41174cf7c..65a3ba284 100644
--- a/Remote/Helper/Encryptable.hs
+++ b/Remote/Helper/Encryptable.hs
@@ -66,44 +66,45 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c
c' = foldr M.delete c
-- git-annex used to remove 'encryption' as well, since
-- it was redundant; we now need to keep it for
- -- public-key incryption, hence we leave it on newer
+ -- public-key encryption, hence we leave it on newer
-- remotes (while being backward-compatible).
[ "keyid", "keyid+", "keyid-", "highRandomQuality" ]
-{- Modifies a Remote to support encryption.
- -
- - Two additional functions must be provided by the remote,
- - to support storing and retrieving encrypted content. -}
+{- Modifies a Remote to support encryption. -}
+-- TODO: deprecated
encryptableRemote
:: RemoteConfig
-> ((Cipher, Key) -> Key -> MeterUpdate -> Annex Bool)
-> ((Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool)
-> Remote
-> Remote
-encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
- r {
- storeKey = store,
- retrieveKeyFile = retrieve,
- retrieveKeyFileCheap = retrieveCheap,
- removeKey = withkey $ removeKey r,
- hasKey = withkey $ hasKey r,
- cost = maybe
- (cost r)
- (const $ cost r + encryptedRemoteCostAdj)
- (extractCipher c)
- }
- where
- store k f p = cip k >>= maybe
+encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r
+ { storeKey = \k f p -> cip k >>= maybe
(storeKey r k f p)
- (\enck -> storeKeyEncrypted enck k p)
- retrieve k f d p = cip k >>= maybe
+ (\v -> storeKeyEncrypted v k p)
+ , retrieveKeyFile = \k f d p -> cip k >>= maybe
(retrieveKeyFile r k f d p)
- (\enck -> retrieveKeyFileEncrypted enck k d p)
- retrieveCheap k d = cip k >>= maybe
+ (\v -> retrieveKeyFileEncrypted v k d p)
+ , retrieveKeyFileCheap = \k d -> cip k >>= maybe
(retrieveKeyFileCheap r k d)
(\_ -> return False)
- withkey a k = cip k >>= maybe (a k) (a . snd)
- cip = cipherKey c
+ , removeKey = \k -> cip k >>= maybe
+ (removeKey r k)
+ (\(_, enckey) -> removeKey r enckey)
+ , hasKey = \k -> cip k >>= maybe
+ (hasKey r k)
+ (\(_, enckey) -> hasKey r enckey)
+ , cost = maybe
+ (cost r)
+ (const $ cost r + encryptedRemoteCostAdj)
+ (extractCipher c)
+ }
+ where
+ cip k = do
+ v <- cipherKey c
+ return $ case v of
+ Nothing -> Nothing
+ Just (cipher, enck) -> Just (cipher, enck k)
{- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex
- state. -}
@@ -136,11 +137,11 @@ embedCreds c
| isJust (M.lookup "cipherkeys" c) && isJust (M.lookup "cipher" c) = True
| otherwise = False
-{- Gets encryption Cipher, and encrypted version of Key. -}
-cipherKey :: RemoteConfig -> Key -> Annex (Maybe (Cipher, Key))
-cipherKey c k = fmap make <$> remoteCipher c
+{- Gets encryption Cipher, and key encryptor. -}
+cipherKey :: RemoteConfig -> Annex (Maybe (Cipher, EncKey))
+cipherKey c = fmap make <$> remoteCipher c
where
- make ciphertext = (ciphertext, encryptKey mac ciphertext k)
+ make ciphertext = (ciphertext, encryptKey mac ciphertext)
mac = fromMaybe defaultMac $ M.lookup "mac" c >>= readMac
{- Stores an StorableCipher in a remote's configuration. -}
diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs
index 4be7e4701..d6644cdc7 100644
--- a/Remote/WebDAV.hs
+++ b/Remote/WebDAV.hs
@@ -5,7 +5,7 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-{-# LANGUAGE ScopedTypeVariables, CPP #-}
+{-# LANGUAGE ScopedTypeVariables #-}
module Remote.WebDAV (remote, davCreds, configUrl) where
@@ -16,11 +16,7 @@ import qualified Data.ByteString.Lazy.UTF8 as L8
import qualified Data.ByteString.Lazy as L
import qualified Control.Exception as E
import qualified Control.Exception.Lifted as EL
-#if MIN_VERSION_DAV(0,6,0)
import Network.HTTP.Client (HttpException(..))
-#else
-import Network.HTTP.Conduit (HttpException(..))
-#endif
import Network.HTTP.Types
import System.Log.Logger (debugM)
import System.IO.Error
@@ -113,7 +109,7 @@ storeHelper :: Remote -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString ->
storeHelper r k baseurl user pass b = catchBoolIO $ do
mkdirRecursiveDAV tmpurl user pass
case chunkconfig of
- NoChunks -> flip catchNonAsync (\e -> print e >> return False) $ do
+ NoChunks -> flip catchNonAsync (\e -> warningIO (show e) >> return False) $ do
storehttp tmpurl b
finalizer tmpurl keyurl
return True
@@ -140,7 +136,7 @@ retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex
retrieve r k _f d p = metered (Just p) k $ \meterupdate ->
davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $
withStoredFiles r k baseurl user pass onerr $ \urls -> do
- meteredWriteFileChunks meterupdate d urls $ \url -> do
+ Legacy.meteredWriteFileChunks meterupdate d urls $ \url -> do
mb <- getDAV url user pass
case mb of
Nothing -> throwIO "download failed"
@@ -308,57 +304,37 @@ debugDAV :: DavUrl -> String -> IO ()
debugDAV msg url = debugM "DAV" $ msg ++ " " ++ url
{---------------------------------------------------------------------
- - Low-level DAV operations, using the new DAV monad when available.
+ - Low-level DAV operations.
---------------------------------------------------------------------}
putDAV :: DavUrl -> DavUser -> DavPass -> L.ByteString -> IO ()
putDAV url user pass b = do
debugDAV "PUT" url
-#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass $ putContentM (contentType, b)
-#else
- putContent url user pass (contentType, b)
-#endif
getDAV :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString)
getDAV url user pass = do
debugDAV "GET" url
eitherToMaybe <$> tryNonAsync go
where
-#if MIN_VERSION_DAV(0,6,0)
go = goDAV url user pass $ snd <$> getContentM
-#else
- go = snd . snd <$> getPropsAndContent url user pass
-#endif
deleteDAV :: DavUrl -> DavUser -> DavPass -> IO ()
deleteDAV url user pass = do
debugDAV "DELETE" url
-#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass delContentM
-#else
- deleteContent url user pass
-#endif
moveDAV :: DavUrl -> DavUrl -> DavUser -> DavPass -> IO ()
moveDAV url newurl user pass = do
debugDAV ("MOVE to " ++ newurl ++ " from ") url
-#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass $ moveContentM newurl'
-#else
- moveContent url newurl' user pass
-#endif
where
newurl' = B8.fromString newurl
mkdirDAV :: DavUrl -> DavUser -> DavPass -> IO Bool
mkdirDAV url user pass = do
debugDAV "MKDIR" url
-#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass mkCol
-#else
- makeCollection url user pass
-#endif
existsDAV :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool)
existsDAV url user pass = do
@@ -366,35 +342,19 @@ existsDAV url user pass = do
either (Left . show) id <$> tryNonAsync check
where
ispresent = return . Right
-#if MIN_VERSION_DAV(0,6,0)
check = goDAV url user pass $ do
setDepth Nothing
EL.catchJust
(matchStatusCodeException notFound404)
(getPropsM >> ispresent True)
(const $ ispresent False)
-#else
- check = E.catchJust
- (matchStatusCodeException notFound404)
-#if ! MIN_VERSION_DAV(0,4,0)
- (getProps url user pass >> ispresent True)
-#else
- (getProps url user pass Nothing >> ispresent True)
-#endif
- (const $ ispresent False)
-#endif
matchStatusCodeException :: Status -> HttpException -> Maybe ()
-#if MIN_VERSION_DAV(0,6,0)
matchStatusCodeException want (StatusCodeException s _ _)
-#else
-matchStatusCodeException want (StatusCodeException s _)
-#endif
| s == want = Just ()
| otherwise = Nothing
matchStatusCodeException _ _ = Nothing
-#if MIN_VERSION_DAV(0,6,0)
goDAV :: DavUrl -> DavUser -> DavPass -> DAVT IO a -> IO a
goDAV url user pass a = choke $ evalDAVT url $ do
setResponseTimeout Nothing -- disable default (5 second!) timeout
@@ -407,4 +367,3 @@ goDAV url user pass a = choke $ evalDAVT url $ do
case x of
Left e -> error e
Right r -> return r
-#endif
diff --git a/Types/Backend.hs b/Types/Backend.hs
index 7eb59b6e2..5c5855bc3 100644
--- a/Types/Backend.hs
+++ b/Types/Backend.hs
@@ -15,9 +15,16 @@ import Types.KeySource
data BackendA a = Backend
{ name :: String
, getKey :: KeySource -> a (Maybe Key)
+ -- Checks the content of a key.
, fsckKey :: Maybe (Key -> FilePath -> a Bool)
+ -- Checks if a key can be upgraded to a better form.
, canUpgradeKey :: Maybe (Key -> Bool)
+ -- Checks if there is a fast way to migrate a key to a different
+ -- backend (ie, without re-hashing).
, fastMigrate :: Maybe (Key -> BackendA a -> Maybe Key)
+ -- Checks if a key is known (or assumed) to always refer to the
+ -- same data.
+ , isStableKey :: Key -> Bool
}
instance Show (BackendA a) where
diff --git a/Types/Command.hs b/Types/Command.hs
index 0df7c82e6..1f8456194 100644
--- a/Types/Command.hs
+++ b/Types/Command.hs
@@ -69,6 +69,7 @@ data CommandSection
| SectionMetaData
| SectionUtility
| SectionPlumbing
+ | SectionTesting
deriving (Eq, Ord, Enum, Bounded)
descSection :: CommandSection -> String
@@ -79,3 +80,4 @@ descSection SectionQuery = "Query commands"
descSection SectionMetaData = "Metadata commands"
descSection SectionUtility = "Utility commands"
descSection SectionPlumbing = "Plumbing commands"
+descSection SectionTesting = "Testing commands"
diff --git a/Types/Key.hs b/Types/Key.hs
index 90f66f23e..154e813ff 100644
--- a/Types/Key.hs
+++ b/Types/Key.hs
@@ -13,6 +13,8 @@ module Types.Key (
stubKey,
key2file,
file2key,
+ nonChunkKey,
+ chunkKeyOffset,
prop_idempotent_key_encode,
prop_idempotent_key_decode
@@ -47,6 +49,19 @@ stubKey = Key
, keyChunkNum = Nothing
}
+-- Gets the parent of a chunk key.
+nonChunkKey :: Key -> Key
+nonChunkKey k = k
+ { keyChunkSize = Nothing
+ , keyChunkNum = Nothing
+ }
+
+-- Where a chunk key is offset within its parent.
+chunkKeyOffset :: Key -> Maybe Integer
+chunkKeyOffset k = (*)
+ <$> keyChunkSize k
+ <*> (pred <$> keyChunkNum k)
+
fieldSep :: Char
fieldSep = '-'
diff --git a/Types/Remote.hs b/Types/Remote.hs
index 2ddb68dfb..805b98474 100644
--- a/Types/Remote.hs
+++ b/Types/Remote.hs
@@ -56,7 +56,9 @@ data RemoteA a = Remote {
name :: RemoteName,
-- Remotes have a use cost; higher is more expensive
cost :: Cost,
- -- Transfers a key to the remote.
+ -- Transfers a key's contents from disk to the remote.
+ -- The key should not appear to be present on the remote until
+ -- all of its contents have been transferred.
storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool,
-- Retrieves a key's contents to a file.
-- (The MeterUpdate does not need to be used if it retrieves
@@ -64,7 +66,7 @@ data RemoteA a = Remote {
retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a Bool,
-- retrieves a key's contents to a tmp file, if it can be done cheaply
retrieveKeyFileCheap :: Key -> FilePath -> a Bool,
- -- removes a key's contents
+ -- removes a key's contents (succeeds if the contents are not present)
removeKey :: Key -> a Bool,
-- Checks if a key is present in the remote; if the remote
-- cannot be accessed returns a Left error message.
diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs
new file mode 100644
index 000000000..33f66efb1
--- /dev/null
+++ b/Types/StoreRetrieve.hs
@@ -0,0 +1,37 @@
+{- Types for Storer and Retriever actions for remotes.
+ -
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE Rank2Types #-}
+
+module Types.StoreRetrieve where
+
+import Common.Annex
+import Utility.Metered
+
+import qualified Data.ByteString.Lazy as L
+
+-- Prepares for and then runs an action that will act on a Key's
+-- content, passing it a helper when the preparation is successful.
+type Preparer helper = forall a. Key -> (Maybe helper -> Annex a) -> Annex a
+
+-- A source of a Key's content.
+data ContentSource
+ = FileContent FilePath
+ | ByteContent L.ByteString
+
+isByteContent :: ContentSource -> Bool
+isByteContent (ByteContent _) = True
+isByteContent (FileContent _) = False
+
+-- Action that stores a Key's content on a remote.
+-- Can throw exceptions.
+type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool
+
+-- Action that retrieves a Key's content from a remote, passing it to a
+-- callback.
+-- Throws exception if key is not present, or remote is not accessible.
+type Retriever = Key -> MeterUpdate -> (ContentSource -> Annex Bool) -> Annex Bool
diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs
index a00bf99da..410259b11 100644
--- a/Utility/Gpg.hs
+++ b/Utility/Gpg.hs
@@ -11,14 +11,15 @@ module Utility.Gpg where
import Control.Applicative
import Control.Concurrent
+import Control.Monad.IO.Class
import qualified Data.Map as M
+import Control.Monad.Catch (bracket, MonadMask)
import Common
import qualified Build.SysConfig as SysConfig
#ifndef mingw32_HOST_OS
import System.Posix.Types
-import Control.Exception (bracket)
import System.Path
import Utility.Env
#else
@@ -104,18 +105,18 @@ pipeStrict params input = do
-
- Note that to avoid deadlock with the cleanup stage,
- the reader must fully consume gpg's input before returning. -}
-feedRead :: [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a
+feedRead :: (MonadIO m, MonadMask m) => [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> m a) -> m a
feedRead params passphrase feeder reader = do
#ifndef mingw32_HOST_OS
-- pipe the passphrase into gpg on a fd
- (frompipe, topipe) <- createPipe
- void $ forkIO $ do
+ (frompipe, topipe) <- liftIO createPipe
+ liftIO $ void $ forkIO $ do
toh <- fdToHandle topipe
hPutStrLn toh passphrase
hClose toh
let Fd pfd = frompipe
let passphrasefd = [Param "--passphrase-fd", Param $ show pfd]
- closeFd frompipe `after` go (passphrasefd ++ params)
+ liftIO (closeFd frompipe) `after` go (passphrasefd ++ params)
#else
-- store the passphrase in a temp file for gpg
withTmpFile "gpg" $ \tmpfile h -> do
@@ -128,15 +129,24 @@ feedRead params passphrase feeder reader = do
go params' = pipeLazy params' feeder reader
{- Like feedRead, but without passphrase. -}
-pipeLazy :: [CommandParam] -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a
+pipeLazy :: (MonadIO m, MonadMask m) => [CommandParam] -> (Handle -> IO ()) -> (Handle -> m a) -> m a
pipeLazy params feeder reader = do
- params' <- stdParams $ Param "--batch" : params
- withBothHandles createProcessSuccess (proc gpgcmd params')
- $ \(to, from) -> do
- void $ forkIO $ do
- feeder to
- hClose to
- reader from
+ params' <- liftIO $ stdParams $ Param "--batch" : params
+ let p = (proc gpgcmd params')
+ { std_in = CreatePipe
+ , std_out = CreatePipe
+ , std_err = Inherit
+ }
+ bracket (setup p) (cleanup p) go
+ where
+ setup = liftIO . createProcess
+ cleanup p (_, _, _, pid) = liftIO $ forceSuccessProcess p pid
+ go p = do
+ let (to, from) = bothHandles p
+ liftIO $ void $ forkIO $ do
+ feeder to
+ hClose to
+ reader from
{- Finds gpg public keys matching some string. (Could be an email address,
- a key id, or a name; See the section 'HOW TO SPECIFY A USER ID' of
diff --git a/Utility/Metered.hs b/Utility/Metered.hs
index 0d94c1c5c..4618aecfe 100644
--- a/Utility/Metered.hs
+++ b/Utility/Metered.hs
@@ -16,6 +16,7 @@ import qualified Data.ByteString as S
import System.IO.Unsafe
import Foreign.Storable (Storable(sizeOf))
import System.Posix.Types
+import Data.Int
{- An action that can be run repeatedly, updating it on the bytes processed.
-
@@ -23,6 +24,9 @@ import System.Posix.Types
- far, *not* an incremental amount since the last call. -}
type MeterUpdate = (BytesProcessed -> IO ())
+nullMeterUpdate :: MeterUpdate
+nullMeterUpdate _ = return ()
+
{- Total number of bytes processed so far. -}
newtype BytesProcessed = BytesProcessed Integer
deriving (Eq, Ord, Show)
@@ -31,6 +35,10 @@ class AsBytesProcessed a where
toBytesProcessed :: a -> BytesProcessed
fromBytesProcessed :: BytesProcessed -> a
+instance AsBytesProcessed BytesProcessed where
+ toBytesProcessed = id
+ fromBytesProcessed = id
+
instance AsBytesProcessed Integer where
toBytesProcessed i = BytesProcessed i
fromBytesProcessed (BytesProcessed i) = i
@@ -39,6 +47,10 @@ instance AsBytesProcessed Int where
toBytesProcessed i = BytesProcessed $ toInteger i
fromBytesProcessed (BytesProcessed i) = fromInteger i
+instance AsBytesProcessed Int64 where
+ toBytesProcessed i = BytesProcessed $ toInteger i
+ fromBytesProcessed (BytesProcessed i) = fromInteger i
+
instance AsBytesProcessed FileOffset where
toBytesProcessed sz = BytesProcessed $ toInteger sz
fromBytesProcessed (BytesProcessed sz) = fromInteger sz
@@ -77,6 +89,13 @@ meteredWriteFile :: MeterUpdate -> FilePath -> L.ByteString -> IO ()
meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h ->
meteredWrite meterupdate h b
+{- Applies an offset to a MeterUpdate. This can be useful when
+ - performing a sequence of actions, such as multiple meteredWriteFiles,
+ - that all update a common meter progressively. Or when resuming.
+ -}
+offsetMeterUpdate :: MeterUpdate -> BytesProcessed -> MeterUpdate
+offsetMeterUpdate base offset = \n -> base (offset `addBytesProcessed` n)
+
{- This is like L.hGetContents, but after each chunk is read, a meter
- is updated based on the size of the chunk.
-
diff --git a/Utility/Process.hs b/Utility/Process.hs
index 1f722af81..e25618eba 100644
--- a/Utility/Process.hs
+++ b/Utility/Process.hs
@@ -31,6 +31,7 @@ module Utility.Process (
stdinHandle,
stdoutHandle,
stderrHandle,
+ bothHandles,
processHandle,
devNull,
) where
diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs
index bed30bb4d..7da5cc284 100644
--- a/Utility/Tmp.hs
+++ b/Utility/Tmp.hs
@@ -9,11 +9,12 @@
module Utility.Tmp where
-import Control.Exception (bracket)
import System.IO
import System.Directory
import Control.Monad.IfElse
import System.FilePath
+import Control.Monad.IO.Class
+import Control.Monad.Catch (bracket, MonadMask)
import Utility.Exception
import Utility.FileSystemEncoding
@@ -42,18 +43,18 @@ viaTmp a file content = bracket setup cleanup use
{- Runs an action with a tmp file located in the system's tmp directory
- (or in "." if there is none) then removes the file. -}
-withTmpFile :: Template -> (FilePath -> Handle -> IO a) -> IO a
+withTmpFile :: (MonadIO m, MonadMask m) => Template -> (FilePath -> Handle -> m a) -> m a
withTmpFile template a = do
- tmpdir <- catchDefaultIO "." getTemporaryDirectory
+ tmpdir <- liftIO $ catchDefaultIO "." getTemporaryDirectory
withTmpFileIn tmpdir template a
{- Runs an action with a tmp file located in the specified directory,
- then removes the file. -}
-withTmpFileIn :: FilePath -> Template -> (FilePath -> Handle -> IO a) -> IO a
+withTmpFileIn :: (MonadIO m, MonadMask m) => FilePath -> Template -> (FilePath -> Handle -> m a) -> m a
withTmpFileIn tmpdir template a = bracket create remove use
where
- create = openTempFile tmpdir template
- remove (name, handle) = do
+ create = liftIO $ openTempFile tmpdir template
+ remove (name, handle) = liftIO $ do
hClose handle
catchBoolIO (removeFile name >> return True)
use (name, handle) = a name handle
diff --git a/debian/changelog b/debian/changelog
index 7b743b711..3783c2655 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,12 @@
git-annex (5.20140718) UNRELEASED; urgency=medium
+ * New chunk= option to chunk files stored in special remotes.
+ Currently supported by: directory, and all external special remotes.
+ * Partially transferred files are automatically resumed when using
+ chunked remotes!
+ * The old chunksize= option is deprecated. Do not use for new remotes.
+ * Legacy code for directory remotes using the old chunksize= option
+ will keep them working, but more slowly than before.
* webapp: Automatically install Konqueror integration scripts
to get and drop files.
* repair: Removing bad objects could leave fsck finding no more
@@ -8,6 +15,8 @@ git-annex (5.20140718) UNRELEASED; urgency=medium
were incompletely repaired before.
* Fix cost calculation for non-encrypted remotes.
* Display exception message when a transfer fails due to an exception.
+ * WebDAV: Dropped support for DAV before 0.6.1.
+ * testremote: New command to test uploads/downloads to a remote.
-- Joey Hess <joeyh@debian.org> Mon, 21 Jul 2014 14:41:26 -0400
diff --git a/doc/chunking.mdwn b/doc/chunking.mdwn
new file mode 100644
index 000000000..87408f8e1
--- /dev/null
+++ b/doc/chunking.mdwn
@@ -0,0 +1,31 @@
+Some [[special_remotes]] have support for breaking large files up into
+chunks that are stored on the remote.
+
+This can be useful to work around limitations on the size of files
+on the remote.
+
+Chunking also allows for resuming interrupted downloads and uploads.
+
+Note that git-annex has to buffer chunks in memory before they are sent to
+a remote. So, using a large chunk size will make it use more memory.
+
+To enable chunking, pass a `chunk=nnMiB` parameter to `git annex
+initremote, specifying the chunk size.
+
+Good chunk sizes will depend on the remote, but a good starting place
+is probably `1MiB`. Very large chunks are problimatic, both because
+git-annex needs to buffer one chunk in memory when uploading, and because
+a larger chunk will make resuming interrupted transfers less efficient.
+On the other hand, when a file is split into a great many chunks,
+there can be increased overhead of making many requests to the remote.
+
+To disable chunking of a remote that was using chunking,
+pass `chunk=0` to `git annex enableremote`. Any content already stored on
+the remote using chunks will continue to be accessed via chunks, this
+just prevents using chunks when storing new content.
+
+To change the chunk size, pass a `chunk=nnMiB` parameter to
+`git annex enableremote`. This only affects the chunk sized used when
+storing new content.
+
+See also: [[design document|design/assistant/chunks]]
diff --git a/doc/design/assistant/chunks.mdwn b/doc/design/assistant/chunks.mdwn
index 48a1876e4..a9709a778 100644
--- a/doc/design/assistant/chunks.mdwn
+++ b/doc/design/assistant/chunks.mdwn
@@ -231,6 +231,15 @@ cannot check exact file sizes.
If padding is enabled, gpg compression should be disabled, to not leak
clues about how well the files compress and so what kind of file it is.
+## chunk key hashing
+
+A chunk key should hash into the same directory structure as its parent
+key. This will avoid lots of extra hash directories when using chunking
+with non-encrypted keys.
+
+Won't happen when the key is encrypted, but that is good; hashing to the
+same bucket then would allow statistical correlation.
+
## resuming interupted transfers
Resuming interrupted downloads, and uploads are both possible.
diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn
index 6fe09ff7c..01ffe7fd4 100644
--- a/doc/design/external_special_remote_protocol.mdwn
+++ b/doc/design/external_special_remote_protocol.mdwn
@@ -101,12 +101,14 @@ The following requests *must* all be supported by the special remote.
Tells the special remote it's time to prepare itself to be used.
Only INITREMOTE can come before this.
* `TRANSFER STORE|RETRIEVE Key File`
- Requests the transfer of a key. For Send, the File is the file to upload;
- for Receive the File is where to store the download.
+ Requests the transfer of a key. For STORE, the File is the file to upload;
+ for RETRIEVE the File is where to store the download.
Note that the File should not influence the filename used on the remote.
The filename will not contain any whitespace.
+ Note that it's important that, while a Key is being stored, CHECKPRESENT
+ not indicate it's present until all the data has been transferred.
Multiple transfers might be requested by git-annex, but it's fine for the
- program to serialize them and only do one at a time.
+ program to serialize them and only do one at a time.
* `CHECKPRESENT Key`
Requests the remote check if a key is present in it.
* `REMOVE Key`
@@ -286,7 +288,6 @@ start a new process the next time it needs to use a remote.
the remote. However, \n and probably \0 need to be escaped somehow in the
file data, which adds complication.
* uuid discovery during INITREMOTE.
-* Support for splitting files into chunks.
* Support for getting and setting the list of urls that can be associated
with a key.
* Hook into webapp. Needs a way to provide some kind of prompt to the user
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 8ba3558d3..ba851eef8 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -949,19 +949,42 @@ subdirectories).
Merge conflicts between two files that are not annexed will not be
automatically resolved.
+* `remotedaemon`
+
+ Detects when network remotes have received git pushes and fetches from them.
+
+* `xmppgit`
+
+ This command is used internally to perform git pulls over XMPP.
+
+# TESTING COMMANDS
+
* `test`
This runs git-annex's built-in test suite.
There are several parameters, provided by Haskell's tasty test framework.
+ Pass --help for details.
-* `remotedaemon`
+* `testremote remote`
- Detects when network remotes have received git pushes and fetches from them.
+ This tests a remote by generating some random objects and sending them to
+ the remote, then redownloading them, removing them from the remote, etc.
-* `xmppgit`
+ It's safe to run in an existing repository (the repository contents are
+ not altered), although it may perform expensive data transfers.
- This command is used internally to perform git pulls over XMPP.
+ The --size option can be used to tune the size of the generated objects.
+
+ Testing a single remote will use the remote's configuration,
+ automatically varying the chunk sizes, and with simple shared encryption
+ enabled and disabled.
+
+* `fuzztest`
+
+ Generates random changes to files in the current repository,
+ for use in testing the assistant. This is dangerous, so it will not
+ do anything unless --forced.
# OPTIONS
diff --git a/doc/internals/hashing.mdwn b/doc/internals/hashing.mdwn
index cc4bc6456..bdc259b63 100644
--- a/doc/internals/hashing.mdwn
+++ b/doc/internals/hashing.mdwn
@@ -36,3 +36,8 @@ string, but where that would normally encode the bits using the 16 characters
0-9a-f, this instead uses the 32 characters "0123456789zqjxkmvwgpfZQJXKMVWGPF".
The first 2 letters of the resulting string are the first directory, and the
second 2 are the second directory.
+
+## chunk keys
+
+The same hash directory is used for a chunk key as would be used for the
+key that it's a chunk of.
diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn
index 96d593821..6279024ec 100644
--- a/doc/special_remotes/directory.mdwn
+++ b/doc/special_remotes/directory.mdwn
@@ -25,13 +25,11 @@ remote:
* `keyid` - Specifies the gpg key to use for [[encryption]].
-* `chunksize` - Avoid storing files larger than the specified size in the
- directory. For use on directories on mount points that have file size
- limitations. The default is to never chunk files.
- The value can use specified using any commonly used units.
- Example: `chunksize=100 megabytes`
- Note that enabling chunking on an existing remote with non-chunked
- files is not recommended; nor is changing the chunksize.
+* `chunk` - Enables [[chunking]] when storing large files.
+
+* `chunksize` - Deprecated version of chunk parameter above.
+ Do not use for new remotes. It is not safe to change the chunksize
+ setting of an existing remote.
Setup example:
diff --git a/doc/special_remotes/external/example.sh b/doc/special_remotes/external/example.sh
index 5152ccc28..8fed9f4aa 100755
--- a/doc/special_remotes/external/example.sh
+++ b/doc/special_remotes/external/example.sh
@@ -128,14 +128,25 @@ while read line; do
STORE)
# Store the file to a location
# based on the key.
- # XXX when possible, send PROGRESS
+ # XXX when at all possible, send PROGRESS
calclocation "$key"
mkdir -p "$(dirname "$LOC")"
- if runcmd cp "$file" "$LOC"; then
+ # Store in temp file first, so that
+ # CHECKPRESENT does not see it
+ # until it is all stored.
+ mkdir -p "$mydirectory/tmp"
+ tmp="$mydirectory/tmp/$key"
+ if runcmd cp "$file" "$tmp" \
+ && runcmd mv -f "$tmp" "$LOC"; then
echo TRANSFER-SUCCESS STORE "$key"
else
echo TRANSFER-FAILURE STORE "$key"
fi
+
+ mkdir -p "$(dirname "$LOC")"
+ # The file may already exist, so
+ # make sure we can overwrite it.
+ chmod 644 "$LOC" 2>/dev/null || true
;;
RETRIEVE)
# Retrieve from a location based on
diff --git a/doc/special_remotes/webdav.mdwn b/doc/special_remotes/webdav.mdwn
index 871540a97..64eed5d0b 100644
--- a/doc/special_remotes/webdav.mdwn
+++ b/doc/special_remotes/webdav.mdwn
@@ -29,13 +29,11 @@ the webdav remote.
be created as needed. Use of a https URL is strongly
encouraged, since HTTP basic authentication is used.
-* `chunksize` - Avoid storing files larger than the specified size in
- WebDAV. For use when the WebDAV server has file size
- limitations. The default is to never chunk files.
- The value can use specified using any commonly used units.
- Example: `chunksize=75 megabytes`
- Note that enabling chunking on an existing remote with non-chunked
- files is not recommended, nor is changing the chunksize.
+* `chunk` - Enables [[chunking]] when storing large files.
+
+* `chunksize` - Deprecated version of chunk parameter above.
+ Do not use for new remotes. It is not safe to change the chunksize
+ setting of an existing remote.
Setup example:
diff --git a/doc/tips/using_box.com_as_a_special_remote.mdwn b/doc/tips/using_box.com_as_a_special_remote.mdwn
index ac59834f5..149d1f824 100644
--- a/doc/tips/using_box.com_as_a_special_remote.mdwn
+++ b/doc/tips/using_box.com_as_a_special_remote.mdwn
@@ -5,9 +5,9 @@ for providing 50 gb of free storage if you sign up with its Android client.
git-annex can use Box as a [[special remote|special_remotes]].
Recent versions of git-annex make this very easy to set up:
- WEBDAV_USERNAME=you@example.com WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunksize=75mb encryption=shared
+ WEBDAV_USERNAME=you@example.com WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunk=50mb encryption=shared
-Note the use of chunksize; Box has a 100 mb maximum file size, and this
+Note the use of [[chunking]]; Box has a 100 mb maximum file size, and this
breaks up large files into chunks before that limit is reached.
# old davfs2 method
@@ -58,7 +58,7 @@ Create the special remote, in your git-annex repository.
** This example is non-encrypted; fill in your gpg key ID for a securely
encrypted special remote! **
- git annex initremote box.com type=directory directory=/media/box.com chunksize=2mb encryption=none
+ git annex initremote box.com type=directory directory=/media/box.com chunk=2mb encryption=none
Now git-annex can copy files to box.com, get files from it, etc, just like
with any other special remote.
diff --git a/git-annex.cabal b/git-annex.cabal
index ba23d281e..2a39489d4 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -124,7 +124,7 @@ Executable git-annex
if flag(TestSuite)
Build-Depends: tasty (>= 0.7), tasty-hunit, tasty-quickcheck, tasty-rerun,
- optparse-applicative
+ optparse-applicative, crypto-api
CPP-Options: -DWITH_TESTSUITE
if flag(TDFA)
@@ -142,7 +142,7 @@ Executable git-annex
CPP-Options: -DWITH_S3
if flag(WebDAV)
- Build-Depends: DAV ((>= 0.3 && < 0.6) || > 0.6),
+ Build-Depends: DAV (> 0.6),
http-client, http-conduit, http-types, lifted-base
CPP-Options: -DWITH_WEBDAV