summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2015-10-01 15:54:37 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2015-10-01 15:56:39 -0400
commitb9fe55705f19fc39889da6157714039047aed4c9 (patch)
tree107888b257bfcf370353bce2969897046be3af33
parent55d635e356ecae2dd90d8cea355656faf3b24db1 (diff)
Do verification of checksums of annex objects downloaded from remotes.
* When annex objects are received into git repositories, their checksums are verified then too. * To get the old, faster, behavior of not verifying checksums, set annex.verify=false, or remote.<name>.annex-verify=false. * setkey, rekey: These commands also now verify that the provided file matches the key, unless annex.verify=false. * reinject: Already verified content; this can now be disabled by setting annex.verify=false. recvkey and reinject already did verification, so removed now duplicate code from them. fsck still does its own verification, which is ok since it does not use getViaTmp, so verification doesn't happen twice when using fsck --from.
-rw-r--r--Annex/Content.hs63
-rw-r--r--Command/Get.hs20
-rw-r--r--Command/Move.hs3
-rw-r--r--Command/ReKey.hs2
-rw-r--r--Command/RecvKey.hs52
-rw-r--r--Command/Reinject.hs28
-rw-r--r--Command/SetKey.hs2
-rw-r--r--Command/Sync.hs4
-rw-r--r--Command/TestRemote.hs6
-rw-r--r--Command/TransferKey.hs3
-rw-r--r--Command/TransferKeys.hs3
-rw-r--r--Remote/Git.hs2
-rw-r--r--Types/GitConfig.hs4
-rw-r--r--debian/changelog14
-rw-r--r--doc/git-annex-reinject.mdwn4
-rw-r--r--doc/git-annex-setkey.mdwn3
-rw-r--r--doc/git-annex.mdwn6
-rw-r--r--doc/todo/checksum_verification_on_transfer/comment_3_2fa9445619032a378264de8b59958c60._comment17
18 files changed, 137 insertions, 99 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 32c164417..679b7e6b7 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -16,6 +16,7 @@ module Annex.Content (
getViaTmp,
getViaTmp',
checkDiskSpaceToGet,
+ Verify(..),
prepTmp,
withTmp,
checkDiskSpace,
@@ -61,6 +62,9 @@ import Annex.Content.Direct
import Annex.ReplaceFile
import Utility.LockPool
import Messages.Progress
+import qualified Types.Remote
+import qualified Types.Backend
+import qualified Backend
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
@@ -214,25 +218,64 @@ lockContent key a = do
{- Runs an action, passing it the temp file to get,
- and if the action succeeds, verifies the file matches
- the key and moves the file into the annex as a key's content. -}
-getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool
-getViaTmp key action = checkDiskSpaceToGet key False $ getViaTmp' key action
+getViaTmp :: Verify -> Key -> (FilePath -> Annex Bool) -> Annex Bool
+getViaTmp v key action = checkDiskSpaceToGet key False $
+ getViaTmp' v key action
{- Like getViaTmp, but does not check that there is enough disk space
- for the incoming key. For use when the key content is already on disk
- and not being copied into place. -}
-getViaTmp' :: Key -> (FilePath -> Annex Bool) -> Annex Bool
-getViaTmp' key action = do
+getViaTmp' :: Verify -> Key -> (FilePath -> Annex Bool) -> Annex Bool
+getViaTmp' v key action = do
tmpfile <- prepTmp key
ifM (action tmpfile)
- ( do
- moveAnnex key tmpfile
- logStatus key InfoPresent
- return True
- -- the tmp file is left behind, in case caller wants
- -- to resume its transfer
+ ( ifM (verifyKeyContent v key tmpfile)
+ ( do
+ moveAnnex key tmpfile
+ logStatus key InfoPresent
+ return True
+ , do
+ warning "verification of content failed"
+ liftIO $ nukeFile tmpfile
+ return False
+ )
+ -- On transfer failure, the tmp file is left behind, in case
+ -- caller wants to resume its transfer
, return False
)
+{- Verifies that a file is the expected content of a key.
+ -
+ - Most keys have a known size, and if so, the file size is checked.
+ - This is not expensive, so is always done.
+ -
+ - When the key's backend allows verifying the content (eg via checksum),
+ - it is checked. This is an expensive check, so configuration can prevent
+ - it, for either a particular remote or always.
+ -}
+verifyKeyContent :: Verify -> Key -> FilePath -> Annex Bool
+verifyKeyContent v k f = verifysize <&&> verifycontent
+ where
+ verifysize = case Types.Key.keySize k of
+ Nothing -> return True
+ Just size -> do
+ size' <- liftIO $ catchDefaultIO 0 $ getFileSize f
+ return (size' == size)
+ verifycontent = ifM (shouldVerify v)
+ ( case Types.Backend.verifyKeyContent =<< Backend.maybeLookupBackendName (Types.Key.keyBackendName k) of
+ Nothing -> return True
+ Just verifier -> verifier k f
+ , return True
+ )
+
+data Verify = AlwaysVerify | RemoteVerify Remote | DefaultVerify
+
+shouldVerify :: Verify -> Annex Bool
+shouldVerify AlwaysVerify = return True
+shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
+shouldVerify (RemoteVerify r) = shouldVerify DefaultVerify
+ <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))
+
{- Checks if there is enough free disk space to download a key
- to its temp file.
-
diff --git a/Command/Get.hs b/Command/Get.hs
index 324ff2752..58fbefed2 100644
--- a/Command/Get.hs
+++ b/Command/Get.hs
@@ -68,17 +68,16 @@ start' expensivecheck from key afile = stopUnless (not <$> inAnnex key) $
next a
perform :: Key -> AssociatedFile -> CommandPerform
-perform key afile = stopUnless (getViaTmp key $ getKeyFile key afile) $
+perform key afile = stopUnless (getKey key afile) $
next $ return True -- no cleanup needed
{- Try to find a copy of the file in one of the remotes,
- and copy it to here. -}
-getKeyFile :: Key -> AssociatedFile -> FilePath -> Annex Bool
-getKeyFile key afile dest = getKeyFile' key afile dest
- =<< Remote.keyPossibilities key
+getKey :: Key -> AssociatedFile -> Annex Bool
+getKey key afile = getKey' key afile =<< Remote.keyPossibilities key
-getKeyFile' :: Key -> AssociatedFile -> FilePath -> [Remote] -> Annex Bool
-getKeyFile' key afile dest = dispatch
+getKey' :: Key -> AssociatedFile -> [Remote] -> Annex Bool
+getKey' key afile = dispatch
where
dispatch [] = do
showNote "not available"
@@ -102,6 +101,9 @@ getKeyFile' key afile dest = dispatch
| Remote.hasKeyCheap r =
either (const False) id <$> Remote.hasKey r key
| otherwise = return True
- docopy r = download (Remote.uuid r) key afile noRetry noObserver $ \p -> do
- showAction $ "from " ++ Remote.name r
- Remote.retrieveKeyFile r key afile dest p
+ docopy r witness = getViaTmp (RemoteVerify r) key $ \dest ->
+ download (Remote.uuid r) key afile noRetry noObserver
+ (\p -> do
+ showAction $ "from " ++ Remote.name r
+ Remote.retrieveKeyFile r key afile dest p
+ ) witness
diff --git a/Command/Move.hs b/Command/Move.hs
index d95bce6ab..a83ea04dd 100644
--- a/Command/Move.hs
+++ b/Command/Move.hs
@@ -166,7 +166,8 @@ fromPerform src move key afile = ifM (inAnnex key)
go = notifyTransfer Download afile $
download (Remote.uuid src) key afile noRetry noObserver $ \p -> do
showAction $ "from " ++ Remote.name src
- getViaTmp key $ \t -> Remote.retrieveKeyFile src key afile t p
+ getViaTmp (RemoteVerify src) key $ \t ->
+ Remote.retrieveKeyFile src key afile t p
dispatch _ False = stop -- failed
dispatch False True = next $ return True -- copy complete
dispatch True True = do -- finish moving
diff --git a/Command/ReKey.hs b/Command/ReKey.hs
index e38ce3c50..9084814fa 100644
--- a/Command/ReKey.hs
+++ b/Command/ReKey.hs
@@ -52,7 +52,7 @@ perform file oldkey newkey = do
{- Make a hard link to the old key content (when supported),
- to avoid wasting disk space. -}
linkKey :: Key -> Key -> Annex Bool
-linkKey oldkey newkey = getViaTmp' newkey $ \tmp -> do
+linkKey oldkey newkey = getViaTmp' DefaultVerify newkey $ \tmp -> do
src <- calcRepo $ gitAnnexLocation oldkey
liftIO $ ifM (doesFileExist tmp)
( return True
diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs
index 7477bb879..3a8747534 100644
--- a/Command/RecvKey.hs
+++ b/Command/RecvKey.hs
@@ -16,9 +16,6 @@ import Utility.Rsync
import Logs.Transfer
import Command.SendKey (fieldTransfer)
import qualified CmdLine.GitAnnexShell.Fields as Fields
-import qualified Types.Key
-import qualified Types.Backend
-import qualified Backend
cmd :: Command
cmd = noCommit $ command "recvkey" SectionPlumbing
@@ -29,8 +26,12 @@ seek :: CmdParams -> CommandSeek
seek = withKeys start
start :: Key -> CommandStart
-start key = fieldTransfer Download key $ \_p ->
- ifM (getViaTmp key go)
+start key = fieldTransfer Download key $ \_p -> do
+ -- Always verify content when a direct mode repo is sending a file,
+ -- as the file could change while being transferred.
+ fromdirect <- isJust <$> Fields.getField Fields.direct
+ let verify = if fromdirect then AlwaysVerify else DefaultVerify
+ ifM (getViaTmp verify key go)
( do
-- forcibly quit after receiving one key,
-- and shutdown cleanly
@@ -42,43 +43,4 @@ start key = fieldTransfer Download key $ \_p ->
go tmp = do
opts <- filterRsyncSafeOptions . maybe [] words
<$> getField "RsyncOptions"
- ok <- liftIO $ rsyncServerReceive (map Param opts) tmp
-
- -- The file could have been received with permissions that
- -- do not allow reading it, so this is done before the
- -- directcheck.
- freezeContent tmp
-
- if ok
- then ifM (isJust <$> Fields.getField Fields.direct)
- ( directcheck tmp
- , return True
- )
- else return False
- {- If the sending repository uses direct mode, the file
- - it sends could be modified as it's sending it. So check
- - that the right size file was received, and that the key/value
- - Backend is happy with it. -}
- directcheck tmp = do
- oksize <- case Types.Key.keySize key of
- Nothing -> return True
- Just size -> do
- size' <- liftIO $ getFileSize tmp
- return $ size == size'
- if oksize
- then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
- Nothing -> do
- warning "recvkey: received key from direct mode repository using unknown backend; cannot check; discarding"
- return False
- Just backend -> maybe (return True) runverify
- (Types.Backend.verifyKeyContent backend)
- else do
- warning "recvkey: received key with wrong size; discarding"
- return False
- where
- runverify check = ifM (check key tmp)
- ( return True
- , do
- warning "recvkey: received key from direct mode repository seems to have changed as it was transferred; discarding"
- return False
- )
+ liftIO $ rsyncServerReceive (map Param opts) tmp
diff --git a/Command/Reinject.hs b/Command/Reinject.hs
index 76e1420ff..90ddc1c2a 100644
--- a/Command/Reinject.hs
+++ b/Command/Reinject.hs
@@ -11,8 +11,6 @@ import Common.Annex
import Command
import Logs.Location
import Annex.Content
-import qualified Command.Fsck
-import qualified Backend
cmd :: Command
cmd = command "reinject" SectionUtility
@@ -36,29 +34,19 @@ start (src:dest:[])
start _ = error "specify a src file and a dest file"
perform :: FilePath -> FilePath -> Key -> CommandPerform
-perform src dest key = do
- {- Check the content before accepting it. -}
- v <- Backend.getBackend dest key
- case v of
- Nothing -> stop
- Just backend ->
- ifM (Command.Fsck.checkKeySizeOr reject key src
- <&&> Command.Fsck.checkBackendOr reject backend key src)
- ( do
- unlessM move $ error "mv failed!"
- next $ cleanup key
- , error "not reinjecting"
- )
+perform src _dest key = ifM move
+ ( next $ cleanup key
+ , error "failed"
+ )
where
- -- the file might be on a different filesystem,
+ -- The file might be on a different filesystem,
-- so moveFile is used rather than simply calling
- -- moveToObjectDir; disk space is also
- -- checked this way.
- move = getViaTmp key $ \tmp ->
+ -- moveToObjectDir; disk space is also checked this way,
+ -- and the file's content is verified to match the key.
+ move = getViaTmp DefaultVerify key $ \tmp ->
liftIO $ catchBoolIO $ do
moveFile src tmp
return True
- reject = const $ return "wrong file?"
cleanup :: Key -> CommandCleanup
cleanup key = do
diff --git a/Command/SetKey.hs b/Command/SetKey.hs
index d8216a0b4..319229482 100644
--- a/Command/SetKey.hs
+++ b/Command/SetKey.hs
@@ -35,7 +35,7 @@ perform file key = do
-- the file might be on a different filesystem, so moveFile is used
-- rather than simply calling moveAnnex; disk space is also
-- checked this way.
- ok <- getViaTmp key $ \dest ->
+ ok <- getViaTmp DefaultVerify key $ \dest ->
if dest /= file
then liftIO $ catchBoolIO $ do
moveFile file dest
diff --git a/Command/Sync.hs b/Command/Sync.hs
index 19a984300..964b45dc2 100644
--- a/Command/Sync.hs
+++ b/Command/Sync.hs
@@ -37,7 +37,7 @@ import qualified Remote.Git
import Config
import Annex.Wanted
import Annex.Content
-import Command.Get (getKeyFile')
+import Command.Get (getKey')
import qualified Command.Move
import Logs.Location
import Annex.Drop
@@ -476,7 +476,7 @@ syncFile ebloom rs af k = do
)
get have = includeCommandAction $ do
showStart' "get" k af
- next $ next $ getViaTmp k $ \dest -> getKeyFile' k af dest have
+ next $ next $ getKey' k af have
wantput r
| Remote.readonly r || remoteAnnexReadOnly (Remote.gitconfig r) = return False
diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs
index fbe83f2db..e4a9eb829 100644
--- a/Command/TestRemote.hs
+++ b/Command/TestRemote.hs
@@ -159,7 +159,7 @@ test st r k =
Just b -> case verifyKeyContent b of
Nothing -> return True
Just verifier -> verifier k (key2file k)
- get = getViaTmp k $ \dest ->
+ get = getViaTmp (RemoteVerify r) k $ \dest ->
Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate
store = Remote.storeKey r k Nothing nullMeterUpdate
remove = Remote.removeKey r k
@@ -173,10 +173,10 @@ testUnavailable st r k =
, check (`notElem` [Right True, Right False]) "checkPresent" $
Remote.checkPresent r k
, check (== Right False) "retrieveKeyFile" $
- getViaTmp k $ \dest ->
+ getViaTmp (RemoteVerify r) k $ \dest ->
Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate
, check (== Right False) "retrieveKeyFileCheap" $
- getViaTmp k $ \dest ->
+ getViaTmp (RemoteVerify r) k $ \dest ->
Remote.retrieveKeyFileCheap r k Nothing dest
]
where
diff --git a/Command/TransferKey.hs b/Command/TransferKey.hs
index 04dbc1799..56c9ec675 100644
--- a/Command/TransferKey.hs
+++ b/Command/TransferKey.hs
@@ -61,7 +61,8 @@ toPerform key file remote = go Upload file $
fromPerform :: Key -> AssociatedFile -> Remote -> CommandPerform
fromPerform key file remote = go Upload file $
download (uuid remote) key file forwardRetry noObserver $ \p ->
- getViaTmp key $ \t -> Remote.retrieveKeyFile remote key file t p
+ getViaTmp (RemoteVerify remote) key $
+ \t -> Remote.retrieveKeyFile remote key file t p
go :: Direction -> AssociatedFile -> (NotifyWitness -> Annex Bool) -> CommandPerform
go direction file a = notifyTransfer direction file a >>= liftIO . exitBool
diff --git a/Command/TransferKeys.hs b/Command/TransferKeys.hs
index 67f201024..4fb0d9069 100644
--- a/Command/TransferKeys.hs
+++ b/Command/TransferKeys.hs
@@ -43,7 +43,8 @@ start = do
return ok
| otherwise = notifyTransfer direction file $
download (Remote.uuid remote) key file forwardRetry observer $ \p ->
- getViaTmp key $ \t -> Remote.retrieveKeyFile remote key file t p
+ getViaTmp (RemoteVerify remote) key $ \t ->
+ Remote.retrieveKeyFile remote key file t p
observer False t tinfo = recordFailedTransfer t tinfo
observer True _ _ = noop
diff --git a/Remote/Git.hs b/Remote/Git.hs
index 1b5b2ab42..ada2055f2 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -502,7 +502,7 @@ copyToRemote' r key file p
ensureInitialized
runTransfer (Transfer Download u key) file noRetry noObserver $ const $
Annex.Content.saveState True `after`
- Annex.Content.getViaTmp key
+ Annex.Content.getViaTmp (Annex.Content.RemoteVerify r) key
(\dest -> mkCopier hardlink params object dest >>= \a -> a p <&&> liftIO checksuccessio)
)
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index 419a5e4c1..9dde2b91d 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -61,6 +61,7 @@ data GitConfig = GitConfig
, annexHardLink :: Bool
, annexDifferences :: Differences
, annexUsedRefSpec :: Maybe RefSpec
+ , annexVerify :: Bool
, coreSymlinks :: Bool
, coreSharedRepository :: SharedRepository
, gcryptId :: Maybe String
@@ -103,6 +104,7 @@ extractGitConfig r = GitConfig
, annexDifferences = getDifferences r
, annexUsedRefSpec = either (const Nothing) Just . parseRefSpec
=<< getmaybe (annex "used-refspec")
+ , annexVerify = getbool (annex "verify") True
, coreSymlinks = getbool "core.symlinks" True
, coreSharedRepository = getSharedRepository r
, gcryptId = getmaybe "core.gcrypt-id"
@@ -129,6 +131,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexIgnore :: Bool
, remoteAnnexSync :: Bool
, remoteAnnexReadOnly :: Bool
+ , remoteAnnexVerify :: Bool
, remoteAnnexTrustLevel :: Maybe String
, remoteAnnexStartCommand :: Maybe String
, remoteAnnexStopCommand :: Maybe String
@@ -164,6 +167,7 @@ extractRemoteGitConfig r remotename = RemoteGitConfig
, remoteAnnexIgnore = getbool "ignore" False
, remoteAnnexSync = getbool "sync" True
, remoteAnnexReadOnly = getbool "readonly" False
+ , remoteAnnexVerify = getbool "verify" True
, remoteAnnexTrustLevel = notempty $ getmaybe "trustlevel"
, remoteAnnexStartCommand = notempty $ getmaybe "start-command"
, remoteAnnexStopCommand = notempty $ getmaybe "stop-command"
diff --git a/debian/changelog b/debian/changelog
index 7dfeabf50..548ba0593 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,17 @@
+git-annex (5.20150931) UNRELEASED; urgency=medium
+
+ * Do verification of checksums of annex objects downloaded from remotes.
+ * When annex objects are received into git repositories from other git
+ repos, their checksums are verified then too.
+ * To get the old, faster, behavior of not verifying checksums, set
+ annex.verify=false, or remote.<name>.annex-verify=false.
+ * setkey, rekey: These commands also now verify that the provided file
+ matches the key, unless annex.verify=false.
+ * reinject: Already verified content; this can now be disabled by
+ setting annex.verify=false.
+
+ -- Joey Hess <id@joeyh.name> Thu, 01 Oct 2015 12:42:56 -0400
+
git-annex (5.20150930) unstable; urgency=medium
* Added new linux standalone "ancient" build to support kernels
diff --git a/doc/git-annex-reinject.mdwn b/doc/git-annex-reinject.mdwn
index f5019e4b5..fb175015b 100644
--- a/doc/git-annex-reinject.mdwn
+++ b/doc/git-annex-reinject.mdwn
@@ -14,8 +14,8 @@ which should be an already annexed file whose content is not present.
This can be useful if you have obtained the content of a file from
elsewhere and want to put it in the local annex.
-Automatically runs fsck on dest to check that the expected content was
-provided.
+Verifies that the src file's content matches with the content that the dest
+file is expected to have, and refuses to reinject it otherwise.
Example:
diff --git a/doc/git-annex-setkey.mdwn b/doc/git-annex-setkey.mdwn
index 439984c2b..25e16a14d 100644
--- a/doc/git-annex-setkey.mdwn
+++ b/doc/git-annex-setkey.mdwn
@@ -11,8 +11,7 @@ git annex setkey key file
This plumbing-level command makes the content of the specified key
be set to the specified file. The file is moved into the annex.
-No checking is done that the file contains the expected contents of the key.
-So it's generally a better idea to use [[git-annex-reinject]](1) instead of
+It's generally a better idea to use [[git-annex-reinject]](1) instead of
this command.
# SEE ALSO
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 794950d76..d35702804 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -1026,6 +1026,12 @@ Here are all the supported configuration settings.
This both prevents git-annex sync from pushing changes, and prevents
storing or removing files from read-only remote.
+* `remote.<name>.annex-verify`, `annex.verify`
+
+ By default, git-annex will verify the checksums of objects downloaded
+ from remotes. If you trust a remote and don't want the overhead
+ of these checksums, you can set this to `false`.
+
* `remote.<name>.annexUrl`
Can be used to specify a different url than the regular `remote.<name>.url`
diff --git a/doc/todo/checksum_verification_on_transfer/comment_3_2fa9445619032a378264de8b59958c60._comment b/doc/todo/checksum_verification_on_transfer/comment_3_2fa9445619032a378264de8b59958c60._comment
new file mode 100644
index 000000000..b18e7dcb5
--- /dev/null
+++ b/doc/todo/checksum_verification_on_transfer/comment_3_2fa9445619032a378264de8b59958c60._comment
@@ -0,0 +1,17 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""status update"""
+ date="2015-10-01T19:17:38Z"
+ content="""
+Checksum verification is now done for all downloads, unless disabled via
+annex.verify=false.
+
+When an object is uploaded to a regular git remote, checksum verification
+also also done. (For a local directory, git-annex runs a download from the
+perspective of the remote, so we get it for free, and when git-annex-shell
+recvkey is used, it checksums the data it receives and compares it with the
+key.)
+
+For uploads to special remotes, no checksum verification is done yet.
+Leaving this todo item open because of that gap in the coverage.
+"""]]