diff options
-rw-r--r-- | Annex/Content.hs | 63 | ||||
-rw-r--r-- | Command/Get.hs | 20 | ||||
-rw-r--r-- | Command/Move.hs | 3 | ||||
-rw-r--r-- | Command/ReKey.hs | 2 | ||||
-rw-r--r-- | Command/RecvKey.hs | 52 | ||||
-rw-r--r-- | Command/Reinject.hs | 28 | ||||
-rw-r--r-- | Command/SetKey.hs | 2 | ||||
-rw-r--r-- | Command/Sync.hs | 4 | ||||
-rw-r--r-- | Command/TestRemote.hs | 6 | ||||
-rw-r--r-- | Command/TransferKey.hs | 3 | ||||
-rw-r--r-- | Command/TransferKeys.hs | 3 | ||||
-rw-r--r-- | Remote/Git.hs | 2 | ||||
-rw-r--r-- | Types/GitConfig.hs | 4 | ||||
-rw-r--r-- | debian/changelog | 14 | ||||
-rw-r--r-- | doc/git-annex-reinject.mdwn | 4 | ||||
-rw-r--r-- | doc/git-annex-setkey.mdwn | 3 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 6 | ||||
-rw-r--r-- | doc/todo/checksum_verification_on_transfer/comment_3_2fa9445619032a378264de8b59958c60._comment | 17 |
18 files changed, 137 insertions, 99 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs index 32c164417..679b7e6b7 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -16,6 +16,7 @@ module Annex.Content ( getViaTmp, getViaTmp', checkDiskSpaceToGet, + Verify(..), prepTmp, withTmp, checkDiskSpace, @@ -61,6 +62,9 @@ import Annex.Content.Direct import Annex.ReplaceFile import Utility.LockPool import Messages.Progress +import qualified Types.Remote +import qualified Types.Backend +import qualified Backend {- Checks if a given key's content is currently present. -} inAnnex :: Key -> Annex Bool @@ -214,25 +218,64 @@ lockContent key a = do {- Runs an action, passing it the temp file to get, - and if the action succeeds, verifies the file matches - the key and moves the file into the annex as a key's content. -} -getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool -getViaTmp key action = checkDiskSpaceToGet key False $ getViaTmp' key action +getViaTmp :: Verify -> Key -> (FilePath -> Annex Bool) -> Annex Bool +getViaTmp v key action = checkDiskSpaceToGet key False $ + getViaTmp' v key action {- Like getViaTmp, but does not check that there is enough disk space - for the incoming key. For use when the key content is already on disk - and not being copied into place. -} -getViaTmp' :: Key -> (FilePath -> Annex Bool) -> Annex Bool -getViaTmp' key action = do +getViaTmp' :: Verify -> Key -> (FilePath -> Annex Bool) -> Annex Bool +getViaTmp' v key action = do tmpfile <- prepTmp key ifM (action tmpfile) - ( do - moveAnnex key tmpfile - logStatus key InfoPresent - return True - -- the tmp file is left behind, in case caller wants - -- to resume its transfer + ( ifM (verifyKeyContent v key tmpfile) + ( do + moveAnnex key tmpfile + logStatus key InfoPresent + return True + , do + warning "verification of content failed" + liftIO $ nukeFile tmpfile + return False + ) + -- On transfer failure, the tmp file is left behind, in case + -- caller wants to resume its transfer , return False ) +{- Verifies that a file is the expected content of a key. + - + - Most keys have a known size, and if so, the file size is checked. + - This is not expensive, so is always done. + - + - When the key's backend allows verifying the content (eg via checksum), + - it is checked. This is an expensive check, so configuration can prevent + - it, for either a particular remote or always. + -} +verifyKeyContent :: Verify -> Key -> FilePath -> Annex Bool +verifyKeyContent v k f = verifysize <&&> verifycontent + where + verifysize = case Types.Key.keySize k of + Nothing -> return True + Just size -> do + size' <- liftIO $ catchDefaultIO 0 $ getFileSize f + return (size' == size) + verifycontent = ifM (shouldVerify v) + ( case Types.Backend.verifyKeyContent =<< Backend.maybeLookupBackendName (Types.Key.keyBackendName k) of + Nothing -> return True + Just verifier -> verifier k f + , return True + ) + +data Verify = AlwaysVerify | RemoteVerify Remote | DefaultVerify + +shouldVerify :: Verify -> Annex Bool +shouldVerify AlwaysVerify = return True +shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig +shouldVerify (RemoteVerify r) = shouldVerify DefaultVerify + <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)) + {- Checks if there is enough free disk space to download a key - to its temp file. - diff --git a/Command/Get.hs b/Command/Get.hs index 324ff2752..58fbefed2 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -68,17 +68,16 @@ start' expensivecheck from key afile = stopUnless (not <$> inAnnex key) $ next a perform :: Key -> AssociatedFile -> CommandPerform -perform key afile = stopUnless (getViaTmp key $ getKeyFile key afile) $ +perform key afile = stopUnless (getKey key afile) $ next $ return True -- no cleanup needed {- Try to find a copy of the file in one of the remotes, - and copy it to here. -} -getKeyFile :: Key -> AssociatedFile -> FilePath -> Annex Bool -getKeyFile key afile dest = getKeyFile' key afile dest - =<< Remote.keyPossibilities key +getKey :: Key -> AssociatedFile -> Annex Bool +getKey key afile = getKey' key afile =<< Remote.keyPossibilities key -getKeyFile' :: Key -> AssociatedFile -> FilePath -> [Remote] -> Annex Bool -getKeyFile' key afile dest = dispatch +getKey' :: Key -> AssociatedFile -> [Remote] -> Annex Bool +getKey' key afile = dispatch where dispatch [] = do showNote "not available" @@ -102,6 +101,9 @@ getKeyFile' key afile dest = dispatch | Remote.hasKeyCheap r = either (const False) id <$> Remote.hasKey r key | otherwise = return True - docopy r = download (Remote.uuid r) key afile noRetry noObserver $ \p -> do - showAction $ "from " ++ Remote.name r - Remote.retrieveKeyFile r key afile dest p + docopy r witness = getViaTmp (RemoteVerify r) key $ \dest -> + download (Remote.uuid r) key afile noRetry noObserver + (\p -> do + showAction $ "from " ++ Remote.name r + Remote.retrieveKeyFile r key afile dest p + ) witness diff --git a/Command/Move.hs b/Command/Move.hs index d95bce6ab..a83ea04dd 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -166,7 +166,8 @@ fromPerform src move key afile = ifM (inAnnex key) go = notifyTransfer Download afile $ download (Remote.uuid src) key afile noRetry noObserver $ \p -> do showAction $ "from " ++ Remote.name src - getViaTmp key $ \t -> Remote.retrieveKeyFile src key afile t p + getViaTmp (RemoteVerify src) key $ \t -> + Remote.retrieveKeyFile src key afile t p dispatch _ False = stop -- failed dispatch False True = next $ return True -- copy complete dispatch True True = do -- finish moving diff --git a/Command/ReKey.hs b/Command/ReKey.hs index e38ce3c50..9084814fa 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -52,7 +52,7 @@ perform file oldkey newkey = do {- Make a hard link to the old key content (when supported), - to avoid wasting disk space. -} linkKey :: Key -> Key -> Annex Bool -linkKey oldkey newkey = getViaTmp' newkey $ \tmp -> do +linkKey oldkey newkey = getViaTmp' DefaultVerify newkey $ \tmp -> do src <- calcRepo $ gitAnnexLocation oldkey liftIO $ ifM (doesFileExist tmp) ( return True diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index 7477bb879..3a8747534 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -16,9 +16,6 @@ import Utility.Rsync import Logs.Transfer import Command.SendKey (fieldTransfer) import qualified CmdLine.GitAnnexShell.Fields as Fields -import qualified Types.Key -import qualified Types.Backend -import qualified Backend cmd :: Command cmd = noCommit $ command "recvkey" SectionPlumbing @@ -29,8 +26,12 @@ seek :: CmdParams -> CommandSeek seek = withKeys start start :: Key -> CommandStart -start key = fieldTransfer Download key $ \_p -> - ifM (getViaTmp key go) +start key = fieldTransfer Download key $ \_p -> do + -- Always verify content when a direct mode repo is sending a file, + -- as the file could change while being transferred. + fromdirect <- isJust <$> Fields.getField Fields.direct + let verify = if fromdirect then AlwaysVerify else DefaultVerify + ifM (getViaTmp verify key go) ( do -- forcibly quit after receiving one key, -- and shutdown cleanly @@ -42,43 +43,4 @@ start key = fieldTransfer Download key $ \_p -> go tmp = do opts <- filterRsyncSafeOptions . maybe [] words <$> getField "RsyncOptions" - ok <- liftIO $ rsyncServerReceive (map Param opts) tmp - - -- The file could have been received with permissions that - -- do not allow reading it, so this is done before the - -- directcheck. - freezeContent tmp - - if ok - then ifM (isJust <$> Fields.getField Fields.direct) - ( directcheck tmp - , return True - ) - else return False - {- If the sending repository uses direct mode, the file - - it sends could be modified as it's sending it. So check - - that the right size file was received, and that the key/value - - Backend is happy with it. -} - directcheck tmp = do - oksize <- case Types.Key.keySize key of - Nothing -> return True - Just size -> do - size' <- liftIO $ getFileSize tmp - return $ size == size' - if oksize - then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of - Nothing -> do - warning "recvkey: received key from direct mode repository using unknown backend; cannot check; discarding" - return False - Just backend -> maybe (return True) runverify - (Types.Backend.verifyKeyContent backend) - else do - warning "recvkey: received key with wrong size; discarding" - return False - where - runverify check = ifM (check key tmp) - ( return True - , do - warning "recvkey: received key from direct mode repository seems to have changed as it was transferred; discarding" - return False - ) + liftIO $ rsyncServerReceive (map Param opts) tmp diff --git a/Command/Reinject.hs b/Command/Reinject.hs index 76e1420ff..90ddc1c2a 100644 --- a/Command/Reinject.hs +++ b/Command/Reinject.hs @@ -11,8 +11,6 @@ import Common.Annex import Command import Logs.Location import Annex.Content -import qualified Command.Fsck -import qualified Backend cmd :: Command cmd = command "reinject" SectionUtility @@ -36,29 +34,19 @@ start (src:dest:[]) start _ = error "specify a src file and a dest file" perform :: FilePath -> FilePath -> Key -> CommandPerform -perform src dest key = do - {- Check the content before accepting it. -} - v <- Backend.getBackend dest key - case v of - Nothing -> stop - Just backend -> - ifM (Command.Fsck.checkKeySizeOr reject key src - <&&> Command.Fsck.checkBackendOr reject backend key src) - ( do - unlessM move $ error "mv failed!" - next $ cleanup key - , error "not reinjecting" - ) +perform src _dest key = ifM move + ( next $ cleanup key + , error "failed" + ) where - -- the file might be on a different filesystem, + -- The file might be on a different filesystem, -- so moveFile is used rather than simply calling - -- moveToObjectDir; disk space is also - -- checked this way. - move = getViaTmp key $ \tmp -> + -- moveToObjectDir; disk space is also checked this way, + -- and the file's content is verified to match the key. + move = getViaTmp DefaultVerify key $ \tmp -> liftIO $ catchBoolIO $ do moveFile src tmp return True - reject = const $ return "wrong file?" cleanup :: Key -> CommandCleanup cleanup key = do diff --git a/Command/SetKey.hs b/Command/SetKey.hs index d8216a0b4..319229482 100644 --- a/Command/SetKey.hs +++ b/Command/SetKey.hs @@ -35,7 +35,7 @@ perform file key = do -- the file might be on a different filesystem, so moveFile is used -- rather than simply calling moveAnnex; disk space is also -- checked this way. - ok <- getViaTmp key $ \dest -> + ok <- getViaTmp DefaultVerify key $ \dest -> if dest /= file then liftIO $ catchBoolIO $ do moveFile file dest diff --git a/Command/Sync.hs b/Command/Sync.hs index 19a984300..964b45dc2 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -37,7 +37,7 @@ import qualified Remote.Git import Config import Annex.Wanted import Annex.Content -import Command.Get (getKeyFile') +import Command.Get (getKey') import qualified Command.Move import Logs.Location import Annex.Drop @@ -476,7 +476,7 @@ syncFile ebloom rs af k = do ) get have = includeCommandAction $ do showStart' "get" k af - next $ next $ getViaTmp k $ \dest -> getKeyFile' k af dest have + next $ next $ getKey' k af have wantput r | Remote.readonly r || remoteAnnexReadOnly (Remote.gitconfig r) = return False diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index fbe83f2db..e4a9eb829 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -159,7 +159,7 @@ test st r k = Just b -> case verifyKeyContent b of Nothing -> return True Just verifier -> verifier k (key2file k) - get = getViaTmp k $ \dest -> + get = getViaTmp (RemoteVerify r) k $ \dest -> Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate store = Remote.storeKey r k Nothing nullMeterUpdate remove = Remote.removeKey r k @@ -173,10 +173,10 @@ testUnavailable st r k = , check (`notElem` [Right True, Right False]) "checkPresent" $ Remote.checkPresent r k , check (== Right False) "retrieveKeyFile" $ - getViaTmp k $ \dest -> + getViaTmp (RemoteVerify r) k $ \dest -> Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate , check (== Right False) "retrieveKeyFileCheap" $ - getViaTmp k $ \dest -> + getViaTmp (RemoteVerify r) k $ \dest -> Remote.retrieveKeyFileCheap r k Nothing dest ] where diff --git a/Command/TransferKey.hs b/Command/TransferKey.hs index 04dbc1799..56c9ec675 100644 --- a/Command/TransferKey.hs +++ b/Command/TransferKey.hs @@ -61,7 +61,8 @@ toPerform key file remote = go Upload file $ fromPerform :: Key -> AssociatedFile -> Remote -> CommandPerform fromPerform key file remote = go Upload file $ download (uuid remote) key file forwardRetry noObserver $ \p -> - getViaTmp key $ \t -> Remote.retrieveKeyFile remote key file t p + getViaTmp (RemoteVerify remote) key $ + \t -> Remote.retrieveKeyFile remote key file t p go :: Direction -> AssociatedFile -> (NotifyWitness -> Annex Bool) -> CommandPerform go direction file a = notifyTransfer direction file a >>= liftIO . exitBool diff --git a/Command/TransferKeys.hs b/Command/TransferKeys.hs index 67f201024..4fb0d9069 100644 --- a/Command/TransferKeys.hs +++ b/Command/TransferKeys.hs @@ -43,7 +43,8 @@ start = do return ok | otherwise = notifyTransfer direction file $ download (Remote.uuid remote) key file forwardRetry observer $ \p -> - getViaTmp key $ \t -> Remote.retrieveKeyFile remote key file t p + getViaTmp (RemoteVerify remote) key $ \t -> + Remote.retrieveKeyFile remote key file t p observer False t tinfo = recordFailedTransfer t tinfo observer True _ _ = noop diff --git a/Remote/Git.hs b/Remote/Git.hs index 1b5b2ab42..ada2055f2 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -502,7 +502,7 @@ copyToRemote' r key file p ensureInitialized runTransfer (Transfer Download u key) file noRetry noObserver $ const $ Annex.Content.saveState True `after` - Annex.Content.getViaTmp key + Annex.Content.getViaTmp (Annex.Content.RemoteVerify r) key (\dest -> mkCopier hardlink params object dest >>= \a -> a p <&&> liftIO checksuccessio) ) diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 419a5e4c1..9dde2b91d 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -61,6 +61,7 @@ data GitConfig = GitConfig , annexHardLink :: Bool , annexDifferences :: Differences , annexUsedRefSpec :: Maybe RefSpec + , annexVerify :: Bool , coreSymlinks :: Bool , coreSharedRepository :: SharedRepository , gcryptId :: Maybe String @@ -103,6 +104,7 @@ extractGitConfig r = GitConfig , annexDifferences = getDifferences r , annexUsedRefSpec = either (const Nothing) Just . parseRefSpec =<< getmaybe (annex "used-refspec") + , annexVerify = getbool (annex "verify") True , coreSymlinks = getbool "core.symlinks" True , coreSharedRepository = getSharedRepository r , gcryptId = getmaybe "core.gcrypt-id" @@ -129,6 +131,7 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexIgnore :: Bool , remoteAnnexSync :: Bool , remoteAnnexReadOnly :: Bool + , remoteAnnexVerify :: Bool , remoteAnnexTrustLevel :: Maybe String , remoteAnnexStartCommand :: Maybe String , remoteAnnexStopCommand :: Maybe String @@ -164,6 +167,7 @@ extractRemoteGitConfig r remotename = RemoteGitConfig , remoteAnnexIgnore = getbool "ignore" False , remoteAnnexSync = getbool "sync" True , remoteAnnexReadOnly = getbool "readonly" False + , remoteAnnexVerify = getbool "verify" True , remoteAnnexTrustLevel = notempty $ getmaybe "trustlevel" , remoteAnnexStartCommand = notempty $ getmaybe "start-command" , remoteAnnexStopCommand = notempty $ getmaybe "stop-command" diff --git a/debian/changelog b/debian/changelog index 7dfeabf50..548ba0593 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,17 @@ +git-annex (5.20150931) UNRELEASED; urgency=medium + + * Do verification of checksums of annex objects downloaded from remotes. + * When annex objects are received into git repositories from other git + repos, their checksums are verified then too. + * To get the old, faster, behavior of not verifying checksums, set + annex.verify=false, or remote.<name>.annex-verify=false. + * setkey, rekey: These commands also now verify that the provided file + matches the key, unless annex.verify=false. + * reinject: Already verified content; this can now be disabled by + setting annex.verify=false. + + -- Joey Hess <id@joeyh.name> Thu, 01 Oct 2015 12:42:56 -0400 + git-annex (5.20150930) unstable; urgency=medium * Added new linux standalone "ancient" build to support kernels diff --git a/doc/git-annex-reinject.mdwn b/doc/git-annex-reinject.mdwn index f5019e4b5..fb175015b 100644 --- a/doc/git-annex-reinject.mdwn +++ b/doc/git-annex-reinject.mdwn @@ -14,8 +14,8 @@ which should be an already annexed file whose content is not present. This can be useful if you have obtained the content of a file from elsewhere and want to put it in the local annex. -Automatically runs fsck on dest to check that the expected content was -provided. +Verifies that the src file's content matches with the content that the dest +file is expected to have, and refuses to reinject it otherwise. Example: diff --git a/doc/git-annex-setkey.mdwn b/doc/git-annex-setkey.mdwn index 439984c2b..25e16a14d 100644 --- a/doc/git-annex-setkey.mdwn +++ b/doc/git-annex-setkey.mdwn @@ -11,8 +11,7 @@ git annex setkey key file This plumbing-level command makes the content of the specified key be set to the specified file. The file is moved into the annex. -No checking is done that the file contains the expected contents of the key. -So it's generally a better idea to use [[git-annex-reinject]](1) instead of +It's generally a better idea to use [[git-annex-reinject]](1) instead of this command. # SEE ALSO diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 794950d76..d35702804 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1026,6 +1026,12 @@ Here are all the supported configuration settings. This both prevents git-annex sync from pushing changes, and prevents storing or removing files from read-only remote. +* `remote.<name>.annex-verify`, `annex.verify` + + By default, git-annex will verify the checksums of objects downloaded + from remotes. If you trust a remote and don't want the overhead + of these checksums, you can set this to `false`. + * `remote.<name>.annexUrl` Can be used to specify a different url than the regular `remote.<name>.url` diff --git a/doc/todo/checksum_verification_on_transfer/comment_3_2fa9445619032a378264de8b59958c60._comment b/doc/todo/checksum_verification_on_transfer/comment_3_2fa9445619032a378264de8b59958c60._comment new file mode 100644 index 000000000..b18e7dcb5 --- /dev/null +++ b/doc/todo/checksum_verification_on_transfer/comment_3_2fa9445619032a378264de8b59958c60._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""status update""" + date="2015-10-01T19:17:38Z" + content=""" +Checksum verification is now done for all downloads, unless disabled via +annex.verify=false. + +When an object is uploaded to a regular git remote, checksum verification +also also done. (For a local directory, git-annex runs a download from the +perspective of the remote, so we get it for free, and when git-annex-shell +recvkey is used, it checksums the data it receives and compares it with the +key.) + +For uploads to special remotes, no checksum verification is done yet. +Leaving this todo item open because of that gap in the coverage. +"""]] |