diff options
author | Joey Hess <joeyh@joeyh.name> | 2017-09-04 16:39:56 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2017-09-04 16:39:56 -0400 |
commit | 0f9282d22dc773bd57f3482b79dd976316ec0467 (patch) | |
tree | f0920aab9f59dda9674a995b33936e1379bab515 | |
parent | 25ed1e54abcc25f729fed016ec77a8cd049142fa (diff) |
git annex get from exports
Straightforward enough, except for the needed belt-and-suspenders sanity
checks to avoid foot shooting due to exports not being key/value stores.
* Even when annex.verify=false, always verify from exports.
* Only get files from exports that use a backend that supports
checksum verification.
* Never trust exports, even if the user says to, because then
`git annex drop` would drop content if the export seemed to contain
a copy.
This commit was supported by the NSF-funded DataLad project.
-rw-r--r-- | Annex/Content.hs | 8 | ||||
-rw-r--r-- | Logs/Trust.hs | 12 | ||||
-rw-r--r-- | Remote/Directory.hs | 2 | ||||
-rw-r--r-- | Remote/Helper/Export.hs | 66 | ||||
-rw-r--r-- | Types/Remote.hs | 2 | ||||
-rw-r--r-- | Types/TrustLevel.hs | 2 | ||||
-rw-r--r-- | doc/git-annex-export.mdwn | 15 | ||||
-rw-r--r-- | doc/todo/export.mdwn | 4 |
8 files changed, 90 insertions, 21 deletions
diff --git a/Annex/Content.hs b/Annex/Content.hs index 0001e8ac9..b74b39753 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -354,8 +354,12 @@ shouldVerify :: VerifyConfig -> Annex Bool shouldVerify AlwaysVerify = return True shouldVerify NoVerify = return False shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig -shouldVerify (RemoteVerify r) = shouldVerify DefaultVerify - <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)) +shouldVerify (RemoteVerify r) = + (shouldVerify DefaultVerify + <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))) + -- Export remotes are not key/value stores, so always verify + -- content from them even when verification is disabled. + <||> Types.Remote.exportSupported (Types.Remote.exportActions r) {- Checks if there is enough free disk space to download a key - to its temp file. diff --git a/Logs/Trust.hs b/Logs/Trust.hs index 4f685be91..85b62ed74 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -65,10 +65,16 @@ trustMap = maybe trustMapLoad return =<< Annex.getState Annex.trustmap trustMapLoad :: Annex TrustMap trustMapLoad = do overrides <- Annex.getState Annex.forcetrust + l <- remoteList + -- Exports are never trusted, since they are not key/value stores. + exports <- filterM (Types.Remote.exportSupported . Types.Remote.exportActions) l + let exportoverrides = M.fromList $ + map (\r -> (Types.Remote.uuid r, UnTrusted)) exports logged <- trustMapRaw - configured <- M.fromList . catMaybes - <$> (map configuredtrust <$> remoteList) - let m = M.union overrides $ M.union configured logged + let configured = M.fromList $ mapMaybe configuredtrust l + let m = M.union exportoverrides $ + M.union overrides $ + M.union configured logged Annex.changeState $ \s -> s { Annex.trustmap = Just m } return m where diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 6adf6477a..7769eddd2 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -44,7 +44,7 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot gen r u c gc = do cst <- remoteCost gc cheapRemoteCost let chunkconfig = getChunkConfig c - return $ Just $ exportableRemote $ specialRemote c + exportableRemote $ specialRemote c (prepareStore dir chunkconfig) (retrieve dir chunkconfig) (simplyPrepare $ remove dir) diff --git a/Remote/Helper/Export.hs b/Remote/Helper/Export.hs index 9bbbb1f59..73ebb9141 100644 --- a/Remote/Helper/Export.hs +++ b/Remote/Helper/Export.hs @@ -8,9 +8,15 @@ module Remote.Helper.Export where import Annex.Common +import qualified Annex import Types.Remote import Types.Creds +import Types.Backend +import Types.Key +import Types.TrustLevel +import Backend import Remote.Helper.Encryptable (isEncrypted) +import Database.Export import qualified Data.Map as M @@ -27,15 +33,59 @@ exportUnsupported = ExportActions -- | A remote that supports exports when configured with exporttree=yes, -- and otherwise does not. -exportableRemote :: Remote -> Remote +exportableRemote :: Remote -> Annex (Maybe Remote) exportableRemote r = case M.lookup "exporttree" (config r) of - Just "yes" -> r - { storeKey = \_ _ _ -> do - warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it" - return False - } - _ -> r - { exportActions = exportUnsupported } + Just "yes" -> do + db <- openDb (uuid r) + + return $ Just $ r + -- Storing a key on an export would need a way to + -- look up the file(s) that the currently exported + -- tree uses for a key; there's not currently an + -- inexpensive way to do that (getExportLocation + -- only finds files that have been stored on the + -- export already). + { storeKey = \_ _ _ -> do + warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it" + return False + -- Keys can be retrieved, but since an export + -- is not a true key/value store, the content of + -- the key has to be able to be strongly verified. + , retrieveKeyFile = \k _af dest p -> + if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k)) + then do + locs <- liftIO $ getExportLocation db k + case locs of + [] -> do + warning "unknown export location" + return (False, UnVerified) + (l:_) -> retrieveExport (exportActions r) k l dest p + else do + warning $ "exported content cannot be verified due to using the " ++ formatKeyVariety (keyVariety k) ++ " backend" + return (False, UnVerified) + , retrieveKeyFileCheap = \_ _ _ -> return False + -- Remove all files a key was exported to. + , removeKey = \k -> do + locs <- liftIO $ getExportLocation db k + oks <- forM locs $ \loc -> do + ok <- removeExport (exportActions r) k loc + when ok $ + liftIO $ removeExportLocation db k loc + return ok + liftIO $ flushDbQueue db + return (and oks) + -- Can't lock content on exports, since they're + -- not key/value stores, and someone else could + -- change what's exported to a file at any time. + , lockContent = Nothing + -- Check if any of the files a key was exported + -- to are present. This doesn't guarantee the + -- export contains the right content. + , checkPresent = \k -> + anyM (checkPresentExport (exportActions r) k) + =<< liftIO (getExportLocation db k) + } + _ -> return $ Just $ r { exportActions = exportUnsupported } exportableRemoteSetup :: (SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)) -> SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) exportableRemoteSetup setupaction st mu cp c gc = case st of diff --git a/Types/Remote.hs b/Types/Remote.hs index 81f1dbe23..46750ee8d 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -159,7 +159,7 @@ unVerified a = do -- The FilePath will be relative, and may contain unix-style path -- separators. newtype ExportLocation = ExportLocation FilePath - deriving (Eq) + deriving (Show, Eq) data ExportActions a = ExportActions { exportSupported :: a Bool diff --git a/Types/TrustLevel.hs b/Types/TrustLevel.hs index 1cc4c662e..6ec18e512 100644 --- a/Types/TrustLevel.hs +++ b/Types/TrustLevel.hs @@ -21,7 +21,7 @@ import Types.UUID -- This order may seem backwards, but we generally want to list dead -- remotes last and trusted ones first. data TrustLevel = Trusted | SemiTrusted | UnTrusted | DeadTrusted - deriving (Eq, Enum, Ord, Bounded) + deriving (Eq, Enum, Ord, Bounded, Show) instance Default TrustLevel where def = SemiTrusted diff --git a/doc/git-annex-export.mdwn b/doc/git-annex-export.mdwn index abe00f09b..c8d8eac9a 100644 --- a/doc/git-annex-export.mdwn +++ b/doc/git-annex-export.mdwn @@ -11,11 +11,11 @@ git annex export `treeish --to remote` Use this command to export a tree of files from a git-annex repository. Normally files are stored on a git-annex special remote named by their -keys. That is great for data storage, but your filenames are obscured. -Exporting replicates the tree to the special remote as-is. +keys. That is great for reliable data storage, but your filenames are +obscured. Exporting replicates the tree to the special remote as-is. -Mixing key/value and exports in the same remote would be a mess and so is -not allowed. So, you have to configure a remote with `exporttree=yes` +Mixing key/value storage and exports in the same remote would be a mess and +so is not allowed. You have to configure a remote with `exporttree=yes` when initially setting it up with [[git-annex-initremote]](1). Repeated exports are done efficiently, by diffing the old and new tree, @@ -24,6 +24,13 @@ and transferring only the changed files. Exports can be interrupted and resumed. However, partially uploaded files will be re-started from the beginning. +Once content has been exported to a remote, commands like `git annex get` +can download content from there the same as from other remotes. However, +since an export is not a key/value store, git-annex has to do more +verification of content downloaded from an export. Some types of keys, +that are not based on checksums, cannot be downloaded from an export. +And, git-annex will never trust an export to retain the content of a key. + # SEE ALSO [[git-annex]](1) diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 99877423b..5813cd869 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -17,7 +17,9 @@ there need to be a new interface in supported remotes? Work is in progress. Todo list: -* Use retrieveExport when getting from export remotes. +* `git annex get --from export` works in the repo that exported to it, + but in another repo, the export db won't be populated, so it won't work. + Maybe just show a useful error message in this case? * Efficient handling of renames. * Support export to aditional special remotes (S3 etc) * Support export to external special remotes. |