summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-09-07 15:53:34 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-09-07 15:53:34 -0400
commita39d0a51b48a02afd7a9cd50725b753d96e9e145 (patch)
tree33909c5a6920d1ac0ab7aa22452a7d15e3338f9f
parentd928368d8a698abcaa06b4ee17764f5514521ffc (diff)
parent6b62556049481d8ed9b75a1642f3422a79c55133 (diff)
Merge branch 'export'
-rw-r--r--Annex/Branch.hs1
-rw-r--r--Annex/Content.hs8
-rw-r--r--Annex/Locations.hs10
-rw-r--r--Annex/SpecialRemote.hs2
-rw-r--r--Assistant/MakeRemote.hs4
-rw-r--r--CHANGELOG5
-rw-r--r--CmdLine/GitAnnex.hs2
-rw-r--r--CmdLine/Seek.hs4
-rw-r--r--CmdLine/Usage.hs2
-rw-r--r--Command/EnableRemote.hs8
-rw-r--r--Command/Export.hs317
-rw-r--r--Command/FindRef.hs3
-rw-r--r--Database/Export.hs88
-rw-r--r--Database/Fsck.hs2
-rw-r--r--Database/Handle.hs65
-rw-r--r--Database/Keys.hs2
-rw-r--r--Database/Queue.hs12
-rw-r--r--Git/Tree.hs6
-rw-r--r--Logs.hs4
-rw-r--r--Logs/Export.hs123
-rw-r--r--Logs/Trust.hs12
-rw-r--r--Remote.hs1
-rw-r--r--Remote/BitTorrent.hs15
-rw-r--r--Remote/Bup.hs15
-rw-r--r--Remote/Ddar.hs15
-rw-r--r--Remote/Directory.hs108
-rw-r--r--Remote/External.hs15
-rw-r--r--Remote/GCrypt.hs15
-rw-r--r--Remote/Git.hs19
-rw-r--r--Remote/Glacier.hs20
-rw-r--r--Remote/Helper/Encryptable.hs12
-rw-r--r--Remote/Helper/Export.hs126
-rw-r--r--Remote/Hook.hs15
-rw-r--r--Remote/List.hs8
-rw-r--r--Remote/P2P.hs15
-rw-r--r--Remote/Rsync.hs15
-rw-r--r--Remote/S3.hs20
-rw-r--r--Remote/Tahoe.hs15
-rw-r--r--Remote/Web.hs15
-rw-r--r--Remote/WebDAV.hs15
-rw-r--r--Types/Remote.hs111
-rw-r--r--Types/TrustLevel.hs2
-rw-r--r--Utility/Tmp.hs2
-rw-r--r--doc/design/exporting_trees_to_special_remotes.mdwn169
-rw-r--r--doc/design/external_special_remote_protocol.mdwn54
-rw-r--r--doc/git-annex-export.mdwn64
-rw-r--r--doc/git-annex-import.mdwn2
-rw-r--r--doc/git-annex.mdwn6
-rw-r--r--doc/internals.mdwn19
-rw-r--r--doc/special_remotes/directory.mdwn4
-rw-r--r--doc/todo/export.mdwn12
-rw-r--r--git-annex.cabal3
52 files changed, 1357 insertions, 250 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 5482dc44b..5214df627 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -21,6 +21,7 @@ module Annex.Branch (
maybeChange,
commit,
forceCommit,
+ getBranch,
files,
withIndex,
performTransitions,
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 0001e8ac9..0b665d4dc 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -354,8 +354,12 @@ shouldVerify :: VerifyConfig -> Annex Bool
shouldVerify AlwaysVerify = return True
shouldVerify NoVerify = return False
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
-shouldVerify (RemoteVerify r) = shouldVerify DefaultVerify
- <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))
+shouldVerify (RemoteVerify r) =
+ (shouldVerify DefaultVerify
+ <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)))
+ -- Export remotes are not key/value stores, so always verify
+ -- content from them even when verification is disabled.
+ <||> Types.Remote.isExportSupported r
{- Checks if there is enough free disk space to download a key
- to its temp file.
diff --git a/Annex/Locations.hs b/Annex/Locations.hs
index 47768b9c1..a5de2e4ff 100644
--- a/Annex/Locations.hs
+++ b/Annex/Locations.hs
@@ -36,6 +36,7 @@ module Annex.Locations (
gitAnnexFsckDbDir,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
+ gitAnnexExportDbDir,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
@@ -290,6 +291,15 @@ gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
gitAnnexFsckResultsLog u r = gitAnnexDir r </> "fsckresults" </> fromUUID u
+{- .git/annex/export/uuid/ is used to store information about
+ - exports to special remotes. -}
+gitAnnexExportDir :: UUID -> Git.Repo -> FilePath
+gitAnnexExportDir u r = gitAnnexDir r </> "export" </> fromUUID u
+
+{- Directory containing database used to record export info. -}
+gitAnnexExportDbDir :: UUID -> Git.Repo -> FilePath
+gitAnnexExportDbDir u r = gitAnnexExportDir u r </> "db"
+
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> FilePath
diff --git a/Annex/SpecialRemote.hs b/Annex/SpecialRemote.hs
index f53a2ca63..c215208db 100644
--- a/Annex/SpecialRemote.hs
+++ b/Annex/SpecialRemote.hs
@@ -81,7 +81,7 @@ autoEnable = do
(Just name, Right t) -> whenM (canenable u) $ do
showSideAction $ "Auto enabling special remote " ++ name
dummycfg <- liftIO dummyRemoteGitConfig
- res <- tryNonAsync $ setup t Enable (Just u) Nothing c dummycfg
+ res <- tryNonAsync $ setup t (Enable c) (Just u) Nothing c dummycfg
case res of
Left e -> warning (show e)
Right _ -> return ()
diff --git a/Assistant/MakeRemote.hs b/Assistant/MakeRemote.hs
index 57abb86fd..b98e7f023 100644
--- a/Assistant/MakeRemote.hs
+++ b/Assistant/MakeRemote.hs
@@ -52,7 +52,7 @@ makeRsyncRemote name location = makeRemote name location $ const $ void $
go Nothing = setupSpecialRemote name Rsync.remote config Nothing
(Nothing, R.Init, Annex.SpecialRemote.newConfig name)
go (Just (u, c)) = setupSpecialRemote name Rsync.remote config Nothing
- (Just u, R.Enable, c)
+ (Just u, R.Enable c, c)
config = M.fromList
[ ("encryption", "shared")
, ("rsyncurl", location)
@@ -91,7 +91,7 @@ enableSpecialRemote name remotetype mcreds config = do
r <- Annex.SpecialRemote.findExisting name
case r of
Nothing -> error $ "Cannot find a special remote named " ++ name
- Just (u, c) -> setupSpecialRemote' False name remotetype config mcreds (Just u, R.Enable, c)
+ Just (u, c) -> setupSpecialRemote' False name remotetype config mcreds (Just u, R.Enable c, c)
setupSpecialRemote :: RemoteName -> RemoteType -> R.RemoteConfig -> Maybe CredPair -> (Maybe UUID, R.SetupStage, R.RemoteConfig) -> Annex RemoteName
setupSpecialRemote = setupSpecialRemote' True
diff --git a/CHANGELOG b/CHANGELOG
index cd2a9d025..3e168de9f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,10 @@
git-annex (6.20170819) UNRELEASED; urgency=medium
+ * git-annex export: New command, can create and efficiently update
+ exports of trees to special remotes.
+ * Use git-annex initremote with exporttree=yes to set up a special remote
+ for use by git-annex export.
+ * Implemented export to directory special remotes.
* Support building with feed-1.0, while still supporting older versions.
* init: Display an additional message when it detects a filesystem that
allows writing to files whose write bit is not set.
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs
index be5f56ba0..1a5a13839 100644
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@@ -95,6 +95,7 @@ import qualified Command.AddUrl
import qualified Command.ImportFeed
import qualified Command.RmUrl
import qualified Command.Import
+import qualified Command.Export
import qualified Command.Map
import qualified Command.Direct
import qualified Command.Indirect
@@ -141,6 +142,7 @@ cmds testoptparser testrunner =
, Command.ImportFeed.cmd
, Command.RmUrl.cmd
, Command.Import.cmd
+ , Command.Export.cmd
, Command.Init.cmd
, Command.Describe.cmd
, Command.InitRemote.cmd
diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs
index 66cd985f4..556a108eb 100644
--- a/CmdLine/Seek.hs
+++ b/CmdLine/Seek.hs
@@ -77,12 +77,12 @@ withFilesNotInGit skipdotfiles a params
go l = seekActions $ prepFiltered a $
return $ concat $ segmentPaths params l
-withFilesInRefs :: (FilePath -> Key -> CommandStart) -> CmdParams -> CommandSeek
+withFilesInRefs :: (FilePath -> Key -> CommandStart) -> [Git.Ref] -> CommandSeek
withFilesInRefs a = mapM_ go
where
go r = do
matcher <- Limit.getMatcher
- (l, cleanup) <- inRepo $ LsTree.lsTree (Git.Ref r)
+ (l, cleanup) <- inRepo $ LsTree.lsTree r
forM_ l $ \i -> do
let f = getTopFilePath $ LsTree.file i
v <- catKey (LsTree.sha i)
diff --git a/CmdLine/Usage.hs b/CmdLine/Usage.hs
index c4522788f..6dd2d053d 100644
--- a/CmdLine/Usage.hs
+++ b/CmdLine/Usage.hs
@@ -94,6 +94,8 @@ paramAddress :: String
paramAddress = "ADDRESS"
paramItem :: String
paramItem = "ITEM"
+paramTreeish :: String
+paramTreeish = "TREEISH"
paramKeyValue :: String
paramKeyValue = "K=V"
paramNothing :: String
diff --git a/Command/EnableRemote.hs b/Command/EnableRemote.hs
index a2a26009e..fd830375a 100644
--- a/Command/EnableRemote.hs
+++ b/Command/EnableRemote.hs
@@ -81,11 +81,11 @@ startSpecialRemote name config (Just (u, c)) = do
gc <- maybe (liftIO dummyRemoteGitConfig)
(return . Remote.gitconfig)
=<< Remote.byUUID u
- next $ performSpecialRemote t u fullconfig gc
+ next $ performSpecialRemote t u c fullconfig gc
-performSpecialRemote :: RemoteType -> UUID -> R.RemoteConfig -> RemoteGitConfig -> CommandPerform
-performSpecialRemote t u c gc = do
- (c', u') <- R.setup t R.Enable (Just u) Nothing c gc
+performSpecialRemote :: RemoteType -> UUID -> R.RemoteConfig -> R.RemoteConfig -> RemoteGitConfig -> CommandPerform
+performSpecialRemote t u oldc c gc = do
+ (c', u') <- R.setup t (R.Enable oldc) (Just u) Nothing c gc
next $ cleanupSpecialRemote u' c'
cleanupSpecialRemote :: UUID -> R.RemoteConfig -> CommandCleanup
diff --git a/Command/Export.hs b/Command/Export.hs
new file mode 100644
index 000000000..d2ba53dd2
--- /dev/null
+++ b/Command/Export.hs
@@ -0,0 +1,317 @@
+{- git-annex command
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE TupleSections #-}
+
+module Command.Export where
+
+import Command
+import qualified Git
+import qualified Git.DiffTree
+import qualified Git.LsTree
+import qualified Git.Ref
+import Git.Types
+import Git.FilePath
+import Git.Sha
+import Types.Key
+import Types.Remote
+import Annex.Content
+import Annex.CatFile
+import Logs.Location
+import Logs.Export
+import Database.Export
+import Messages.Progress
+import Utility.Tmp
+
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Map as M
+
+cmd :: Command
+cmd = command "export" SectionCommon
+ "export content to a remote"
+ paramTreeish (seek <$$> optParser)
+
+data ExportOptions = ExportOptions
+ { exportTreeish :: Git.Ref
+ , exportRemote :: DeferredParse Remote
+ }
+
+optParser :: CmdParamsDesc -> Parser ExportOptions
+optParser _ = ExportOptions
+ <$> (Git.Ref <$> parsetreeish)
+ <*> (parseRemoteOption <$> parseToOption)
+ where
+ parsetreeish = argument str
+ ( metavar paramTreeish
+ )
+
+-- An export includes both annexed files and files stored in git.
+-- For the latter, a SHA1 key is synthesized.
+data ExportKey = AnnexKey Key | GitKey Key
+ deriving (Show, Eq, Ord)
+
+asKey :: ExportKey -> Key
+asKey (AnnexKey k) = k
+asKey (GitKey k) = k
+
+exportKey :: Git.Sha -> Annex ExportKey
+exportKey sha = mk <$> catKey sha
+ where
+ mk (Just k) = AnnexKey k
+ mk Nothing = GitKey $ Key
+ { keyName = show sha
+ , keyVariety = SHA1Key (HasExt False)
+ , keySize = Nothing
+ , keyMtime = Nothing
+ , keyChunkSize = Nothing
+ , keyChunkNum = Nothing
+ }
+
+-- To handle renames which swap files, the exported file is first renamed
+-- to a stable temporary name based on the key.
+exportTempName :: ExportKey -> ExportLocation
+exportTempName ek = ExportLocation $
+ ".git-annex-tmp-content-" ++ key2file (asKey (ek))
+
+seek :: ExportOptions -> CommandSeek
+seek o = do
+ r <- getParsed (exportRemote o)
+ unlessM (isExportSupported r) $
+ giveup "That remote does not support exports."
+
+ new <- fromMaybe (giveup "unknown tree") <$>
+ -- Dereference the tree pointed to by the branch, commit,
+ -- or tag.
+ inRepo (Git.Ref.tree (exportTreeish o))
+ old <- getExport (uuid r)
+ recordExportBeginning (uuid r) new
+ db <- openDb (uuid r)
+
+ -- Clean up after incomplete export of a tree, in which
+ -- the next block of code below may have renamed some files to
+ -- temp files. Diff from the incomplete tree to the new tree,
+ -- and delete any temp files that the new tree can't use.
+ forM_ (concatMap incompleteExportedTreeish old) $ \incomplete ->
+ mapdiff (\diff -> startRecoverIncomplete r db (Git.DiffTree.srcsha diff) (Git.DiffTree.file diff))
+ incomplete
+ new
+
+ -- Diff the old and new trees, and delete or rename to new name all
+ -- changed files in the export. After this, every file that remains
+ -- in the export will have the content from the new treeish.
+ --
+ -- (Also, when there was an export conflict, this resolves it.)
+ case map exportedTreeish old of
+ [] -> return ()
+ [oldtreesha] -> do
+ diffmap <- mkDiffMap oldtreesha new
+ let seekdiffmap a = seekActions $ pure $ map a (M.toList diffmap)
+ -- Rename old files to temp, or delete.
+ seekdiffmap $ \(ek, (moldf, mnewf)) ->
+ case (moldf, mnewf) of
+ (Just oldf, Just _newf) ->
+ startMoveToTempName r db oldf ek
+ (Just oldf, Nothing) ->
+ startUnexport' r db oldf ek
+ _ -> stop
+ -- Rename from temp to new files.
+ seekdiffmap $ \(ek, (moldf, mnewf)) ->
+ case (moldf, mnewf) of
+ (Just _oldf, Just newf) ->
+ startMoveFromTempName r db ek newf
+ _ -> stop
+ ts -> do
+ warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
+ forM_ ts $ \oldtreesha -> do
+ -- Unexport both the srcsha and the dstsha,
+ -- because the wrong content may have
+ -- been renamed to the dstsha due to the
+ -- export conflict.
+ let unexportboth d =
+ [ Git.DiffTree.srcsha d
+ , Git.DiffTree.dstsha d
+ ]
+ -- Don't rename to temp, because the
+ -- content is unknown; delete instead.
+ mapdiff
+ (\diff -> startUnexport r db (Git.DiffTree.file diff) (unexportboth diff))
+ oldtreesha new
+
+ -- Waiting until now to record the export guarantees that,
+ -- if this export is interrupted, there are no files left over
+ -- from a previous export, that are not part of this export.
+ recordExport (uuid r) $ ExportChange
+ { oldTreeish = map exportedTreeish old
+ , newTreeish = new
+ }
+
+ -- Export everything that is not yet exported.
+ (l, cleanup') <- inRepo $ Git.LsTree.lsTree new
+ seekActions $ pure $ map (startExport r db) l
+ void $ liftIO cleanup'
+
+ closeDb db
+ where
+ mapdiff a oldtreesha newtreesha = do
+ (diff, cleanup) <- inRepo $
+ Git.DiffTree.diffTreeRecursive oldtreesha newtreesha
+ seekActions $ pure $ map a diff
+ void $ liftIO cleanup
+
+-- Map of old and new filenames for each changed ExportKey in a diff.
+type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath)
+
+mkDiffMap :: Git.Ref -> Git.Ref -> Annex DiffMap
+mkDiffMap old new = do
+ (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive old new
+ diffmap <- M.fromListWith combinedm . concat <$> forM diff mkdm
+ void $ liftIO cleanup
+ return diffmap
+ where
+ combinedm (srca, dsta) (srcb, dstb) = (srca <|> srcb, dsta <|> dstb)
+ mkdm i = do
+ srcek <- getk (Git.DiffTree.srcsha i)
+ dstek <- getk (Git.DiffTree.dstsha i)
+ return $ catMaybes
+ [ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek
+ , (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek
+ ]
+ getk sha
+ | sha == nullSha = return Nothing
+ | otherwise = Just <$> exportKey sha
+
+startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart
+startExport r db ti = do
+ ek <- exportKey (Git.LsTree.sha ti)
+ stopUnless (liftIO $ notElem loc <$> getExportLocation db (asKey ek)) $ do
+ showStart "export" f
+ next $ performExport r db ek (Git.LsTree.sha ti) loc
+ where
+ loc = ExportLocation $ toInternalGitPath f
+ f = getTopFilePath $ Git.LsTree.file ti
+
+performExport :: Remote -> ExportHandle -> ExportKey -> Sha -> ExportLocation -> CommandPerform
+performExport r db ek contentsha loc = do
+ let storer = storeExport $ exportActions r
+ sent <- case ek of
+ AnnexKey k -> ifM (inAnnex k)
+ ( metered Nothing k $ \m -> do
+ let rollback = void $ performUnexport r db [ek] loc
+ sendAnnex k rollback
+ (\f -> storer f k loc m)
+ , do
+ showNote "not available"
+ return False
+ )
+ -- Sending a non-annexed file.
+ GitKey sha1k -> metered Nothing sha1k $ \m ->
+ withTmpFile "export" $ \tmp h -> do
+ b <- catObject contentsha
+ liftIO $ L.hPut h b
+ liftIO $ hClose h
+ storer tmp sha1k loc m
+ if sent
+ then next $ cleanupExport r db ek loc
+ else stop
+
+cleanupExport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandCleanup
+cleanupExport r db ek loc = do
+ liftIO $ addExportLocation db (asKey ek) loc
+ logChange (asKey ek) (uuid r) InfoPresent
+ return True
+
+startUnexport :: Remote -> ExportHandle -> TopFilePath -> [Git.Sha] -> CommandStart
+startUnexport r db f shas = do
+ eks <- forM (filter (/= nullSha) shas) exportKey
+ if null eks
+ then stop
+ else do
+ showStart "unexport" f'
+ next $ performUnexport r db eks loc
+ where
+ loc = ExportLocation $ toInternalGitPath f'
+ f' = getTopFilePath f
+
+startUnexport' :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
+startUnexport' r db f ek = do
+ showStart "unexport" f'
+ next $ performUnexport r db [ek] loc
+ where
+ loc = ExportLocation $ toInternalGitPath f'
+ f' = getTopFilePath f
+
+performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform
+performUnexport r db eks loc = do
+ ifM (allM (\ek -> removeExport (exportActions r) (asKey ek) loc) eks)
+ ( next $ cleanupUnexport r db eks loc
+ , stop
+ )
+
+cleanupUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandCleanup
+cleanupUnexport r db eks loc = do
+ liftIO $ do
+ forM_ eks $ \ek ->
+ removeExportLocation db (asKey ek) loc
+ -- Flush so that getExportLocation sees this and any
+ -- other removals of the key.
+ flushDbQueue db
+ remaininglocs <- liftIO $
+ concat <$> forM eks (\ek -> getExportLocation db (asKey ek))
+ when (null remaininglocs) $
+ forM_ eks $ \ek ->
+ logChange (asKey ek) (uuid r) InfoMissing
+ return True
+
+startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
+startRecoverIncomplete r db sha oldf
+ | sha == nullSha = stop
+ | otherwise = do
+ ek <- exportKey sha
+ let loc@(ExportLocation f) = exportTempName ek
+ showStart "unexport" f
+ liftIO $ removeExportLocation db (asKey ek) oldloc
+ next $ performUnexport r db [ek] loc
+ where
+ oldloc = ExportLocation $ toInternalGitPath oldf'
+ oldf' = getTopFilePath oldf
+
+startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
+startMoveToTempName r db f ek = do
+ let tmploc@(ExportLocation tmpf) = exportTempName ek
+ showStart "rename" (f' ++ " -> " ++ tmpf)
+ next $ performRename r db ek loc tmploc
+ where
+ loc = ExportLocation $ toInternalGitPath f'
+ f' = getTopFilePath f
+
+startMoveFromTempName :: Remote -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart
+startMoveFromTempName r db ek f = do
+ let tmploc@(ExportLocation tmpf) = exportTempName ek
+ stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
+ showStart "rename" (tmpf ++ " -> " ++ f')
+ next $ performRename r db ek tmploc loc
+ where
+ loc = ExportLocation $ toInternalGitPath f'
+ f' = getTopFilePath f
+
+performRename :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandPerform
+performRename r db ek src dest = do
+ ifM (renameExport (exportActions r) (asKey ek) src dest)
+ ( next $ cleanupRename db ek src dest
+ -- In case the special remote does not support renaming,
+ -- unexport the src instead.
+ , performUnexport r db [ek] src
+ )
+
+cleanupRename :: ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandCleanup
+cleanupRename db ek src dest = do
+ liftIO $ do
+ removeExportLocation db (asKey ek) src
+ addExportLocation db (asKey ek) dest
+ -- Flush so that getExportLocation sees this.
+ flushDbQueue db
+ return True
diff --git a/Command/FindRef.hs b/Command/FindRef.hs
index cb14371e0..93315bcef 100644
--- a/Command/FindRef.hs
+++ b/Command/FindRef.hs
@@ -9,6 +9,7 @@ module Command.FindRef where
import Command
import qualified Command.Find as Find
+import qualified Git
cmd :: Command
cmd = withGlobalOptions nonWorkTreeMatchingOptions $ Find.mkCommand $
@@ -17,4 +18,4 @@ cmd = withGlobalOptions nonWorkTreeMatchingOptions $ Find.mkCommand $
paramRef (seek <$$> Find.optParser)
seek :: Find.FindOptions -> CommandSeek
-seek o = Find.start o `withFilesInRefs` Find.findThese o
+seek o = Find.start o `withFilesInRefs` (map Git.Ref $ Find.findThese o)
diff --git a/Database/Export.hs b/Database/Export.hs
new file mode 100644
index 000000000..00c6ab251
--- /dev/null
+++ b/Database/Export.hs
@@ -0,0 +1,88 @@
+{- Sqlite database used for exports to special remotes.
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -:
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
+{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE RankNTypes #-}
+
+module Database.Export (
+ ExportHandle,
+ openDb,
+ closeDb,
+ addExportLocation,
+ removeExportLocation,
+ flushDbQueue,
+ getExportLocation,
+ ExportedId,
+) where
+
+import Database.Types
+import qualified Database.Queue as H
+import Database.Init
+import Annex.Locations
+import Annex.Common hiding (delete)
+import Types.Remote (ExportLocation(..))
+
+import Database.Persist.TH
+import Database.Esqueleto hiding (Key)
+
+newtype ExportHandle = ExportHandle H.DbQueue
+
+share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase|
+Exported
+ key IKey
+ file SFilePath
+ KeyFileIndex key file
+|]
+
+{- Opens the database, creating it if it doesn't exist yet. -}
+openDb :: UUID -> Annex ExportHandle
+openDb u = do
+ dbdir <- fromRepo (gitAnnexExportDbDir u)
+ let db = dbdir </> "db"
+ unlessM (liftIO $ doesFileExist db) $ do
+ initDb db $ void $
+ runMigrationSilent migrateExport
+ h <- liftIO $ H.openDbQueue H.SingleWriter db "exported"
+ return $ ExportHandle h
+
+closeDb :: ExportHandle -> Annex ()
+closeDb (ExportHandle h) = liftIO $ H.closeDbQueue h
+
+queueDb :: ExportHandle -> SqlPersistM () -> IO ()
+queueDb (ExportHandle h) = H.queueDb h checkcommit
+ where
+ -- commit queue after 1000 changes
+ checkcommit sz _lastcommittime
+ | sz > 1000 = return True
+ | otherwise = return False
+
+addExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
+addExportLocation h k (ExportLocation f) = queueDb h $
+ void $ insertUnique $ Exported (toIKey k) (toSFilePath f)
+
+removeExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
+removeExportLocation h k (ExportLocation f) = queueDb h $
+ delete $ from $ \r -> do
+ where_ (r ^. ExportedKey ==. val ik &&. r ^. ExportedFile ==. val ef)
+ where
+ ik = toIKey k
+ ef = toSFilePath f
+
+flushDbQueue :: ExportHandle -> IO ()
+flushDbQueue (ExportHandle h) = H.flushDbQueue h
+
+{- Note that this does not see recently queued changes. -}
+getExportLocation :: ExportHandle -> Key -> IO [ExportLocation]
+getExportLocation (ExportHandle h) k = H.queryDbQueue h $ do
+ l <- select $ from $ \r -> do
+ where_ (r ^. ExportedKey ==. val ik)
+ return (r ^. ExportedFile)
+ return $ map (ExportLocation . fromSFilePath . unValue) l
+ where
+ ik = toIKey k
diff --git a/Database/Fsck.hs b/Database/Fsck.hs
index 9affeac85..1ce513dcf 100644
--- a/Database/Fsck.hs
+++ b/Database/Fsck.hs
@@ -63,7 +63,7 @@ openDb u = do
initDb db $ void $
runMigrationSilent migrateFsck
lockFileCached =<< fromRepo (gitAnnexFsckDbLock u)
- h <- liftIO $ H.openDbQueue db "fscked"
+ h <- liftIO $ H.openDbQueue H.MultiWriter db "fscked"
return $ FsckHandle h u
closeDb :: FsckHandle -> Annex ()
diff --git a/Database/Handle.hs b/Database/Handle.hs
index 7827be749..f5a0a5dda 100644
--- a/Database/Handle.hs
+++ b/Database/Handle.hs
@@ -9,6 +9,7 @@
module Database.Handle (
DbHandle,
+ DbConcurrency(..),
openDb,
TableName,
queryDb,
@@ -35,27 +36,49 @@ import System.IO
{- A DbHandle is a reference to a worker thread that communicates with
- the database. It has a MVar which Jobs are submitted to. -}
-data DbHandle = DbHandle (Async ()) (MVar Job)
+data DbHandle = DbHandle DbConcurrency (Async ()) (MVar Job)
{- Name of a table that should exist once the database is initialized. -}
type TableName = String
+{- Sqlite only allows a single write to a database at a time; a concurrent
+ - write will crash.
+ -
+ - While a DbHandle serializes concurrent writes from
+ - multiple threads. But, when a database can be written to by
+ - multiple processes concurrently, use MultiWriter to make writes
+ - to the database be done robustly.
+ -
+ - The downside of using MultiWriter is that after writing a change to the
+ - database, the a query using the same DbHandle will not immediately see
+ - the change! This is because the change is actually written using a
+ - separate database connection, and caching can prevent seeing the change.
+ - Also, consider that if multiple processes are writing to a database,
+ - you can't rely on seeing values you've just written anyway, as another
+ - process may change them.
+ -
+ - When a database can only be written to by a single process, use
+ - SingleWriter. Changes written to the database will always be immediately
+ - visible then.
+ -}
+data DbConcurrency = SingleWriter | MultiWriter
+
{- Opens the database, but does not perform any migrations. Only use
- - if the database is known to exist and have the right tables. -}
-openDb :: FilePath -> TableName -> IO DbHandle
-openDb db tablename = do
+ - once the database is known to exist and have the right tables. -}
+openDb :: DbConcurrency -> FilePath -> TableName -> IO DbHandle
+openDb dbconcurrency db tablename = do
jobs <- newEmptyMVar
worker <- async (workerThread (T.pack db) tablename jobs)
-- work around https://github.com/yesodweb/persistent/issues/474
liftIO $ fileEncoding stderr
- return $ DbHandle worker jobs
+ return $ DbHandle dbconcurrency worker jobs
{- This is optional; when the DbHandle gets garbage collected it will
- auto-close. -}
closeDb :: DbHandle -> IO ()
-closeDb (DbHandle worker jobs) = do
+closeDb (DbHandle _ worker jobs) = do
putMVar jobs CloseJob
wait worker
@@ -68,9 +91,12 @@ closeDb (DbHandle worker jobs) = do
- Only one action can be run at a time against a given DbHandle.
- If called concurrently in the same process, this will block until
- it is able to run.
+ -
+ - Note that when the DbHandle was opened in MultiWriter mode, recent
+ - writes may not be seen by queryDb.
-}
queryDb :: DbHandle -> SqlPersistM a -> IO a
-queryDb (DbHandle _ jobs) a = do
+queryDb (DbHandle _ _ jobs) a = do
res <- newEmptyMVar
putMVar jobs $ QueryJob $
liftIO . putMVar res =<< tryNonAsync a
@@ -79,9 +105,9 @@ queryDb (DbHandle _ jobs) a = do
{- Writes a change to the database.
-
- - If a database is opened multiple times and there's a concurrent writer,
- - the write could fail. Retries repeatedly for up to 10 seconds,
- - which should avoid all but the most exceptional problems.
+ - In MultiWriter mode, catches failure to write to the database,
+ - and retries repeatedly for up to 10 seconds, which should avoid
+ - all but the most exceptional problems.
-}
commitDb :: DbHandle -> SqlPersistM () -> IO ()
commitDb h wa = robustly Nothing 100 (commitDb' h wa)
@@ -97,15 +123,22 @@ commitDb h wa = robustly Nothing 100 (commitDb' h wa)
robustly (Just e) (n-1) a
commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ())
-commitDb' (DbHandle _ jobs) a = do
+commitDb' (DbHandle MultiWriter _ jobs) a = do
res <- newEmptyMVar
- putMVar jobs $ ChangeJob $ \runner ->
+ putMVar jobs $ RobustChangeJob $ \runner ->
liftIO $ putMVar res =<< tryNonAsync (runner a)
takeMVar res
+commitDb' (DbHandle SingleWriter _ jobs) a = do
+ res <- newEmptyMVar
+ putMVar jobs $ ChangeJob $
+ liftIO . putMVar res =<< tryNonAsync a
+ takeMVar res
+ `catchNonAsync` (const $ error "sqlite commit crashed")
data Job
= QueryJob (SqlPersistM ())
- | ChangeJob ((SqlPersistM () -> IO ()) -> IO ())
+ | ChangeJob (SqlPersistM ())
+ | RobustChangeJob ((SqlPersistM () -> IO ()) -> IO ())
| CloseJob
workerThread :: T.Text -> TableName -> MVar Job -> IO ()
@@ -127,10 +160,12 @@ workerThread db tablename jobs =
Left BlockedIndefinitelyOnMVar -> return ()
Right CloseJob -> return ()
Right (QueryJob a) -> a >> loop
- -- change is run in a separate database connection
+ Right (ChangeJob a) -> a >> loop
+ -- Change is run in a separate database connection
-- since sqlite only supports a single writer at a
-- time, and it may crash the database connection
- Right (ChangeJob a) -> liftIO (a (runSqliteRobustly tablename db)) >> loop
+ -- that the write is made to.
+ Right (RobustChangeJob a) -> liftIO (a (runSqliteRobustly tablename db)) >> loop
-- like runSqlite, but calls settle on the raw sql Connection.
runSqliteRobustly :: TableName -> T.Text -> (SqlPersistM a) -> IO a
diff --git a/Database/Keys.hs b/Database/Keys.hs
index b9440ac1a..282da9f94 100644
--- a/Database/Keys.hs
+++ b/Database/Keys.hs
@@ -124,7 +124,7 @@ openDb createdb _ = catchPermissionDenied permerr $ withExclusiveLock gitAnnexKe
open db
(False, False) -> return DbUnavailable
where
- open db = liftIO $ DbOpen <$> H.openDbQueue db SQL.containedTable
+ open db = liftIO $ DbOpen <$> H.openDbQueue H.MultiWriter db SQL.containedTable
-- If permissions don't allow opening the database, treat it as if
-- it does not exist.
permerr e = case createdb of
diff --git a/Database/Queue.hs b/Database/Queue.hs
index 143871079..f0a2d2b65 100644
--- a/Database/Queue.hs
+++ b/Database/Queue.hs
@@ -9,6 +9,7 @@
module Database.Queue (
DbQueue,
+ DbConcurrency(..),
openDbQueue,
queryDbQueue,
closeDbQueue,
@@ -35,9 +36,9 @@ data DbQueue = DQ DbHandle (MVar Queue)
{- Opens the database queue, but does not perform any migrations. Only use
- if the database is known to exist and have the right tables; ie after
- running initDb. -}
-openDbQueue :: FilePath -> TableName -> IO DbQueue
-openDbQueue db tablename = DQ
- <$> openDb db tablename
+openDbQueue :: DbConcurrency -> FilePath -> TableName -> IO DbQueue
+openDbQueue dbconcurrency db tablename = DQ
+ <$> openDb dbconcurrency db tablename
<*> (newMVar =<< emptyQueue)
{- This or flushDbQueue must be called, eg at program exit to ensure
@@ -60,8 +61,11 @@ flushDbQueue (DQ hdl qvar) = do
{- Makes a query using the DbQueue's database connection.
- This should not be used to make changes to the database!
-
- - Queries will not return changes that have been recently queued,
+ - Queries will not see changes that have been recently queued,
- so use with care.
+ -
+ - Also, when the database was opened in MultiWriter mode,
+ - queries may not see changes even after flushDbQueue.
-}
queryDbQueue :: DbQueue -> SqlPersistM a -> IO a
queryDbQueue (DQ hdl _) = queryDb hdl
diff --git a/Git/Tree.hs b/Git/Tree.hs
index 3e6b85a1d..9e9b17af2 100644
--- a/Git/Tree.hs
+++ b/Git/Tree.hs
@@ -14,6 +14,7 @@ module Git.Tree (
recordTree,
TreeItem(..),
adjustTree,
+ treeMode,
) where
import Common
@@ -94,12 +95,15 @@ mkTree (MkTreeHandle cp) l = CoProcess.query cp send receive
send h = do
forM_ l $ \i -> hPutStr h $ case i of
TreeBlob f fm s -> mkTreeOutput fm BlobObject s f
- RecordedSubTree f s _ -> mkTreeOutput 0o040000 TreeObject s f
+ RecordedSubTree f s _ -> mkTreeOutput treeMode TreeObject s f
NewSubTree _ _ -> error "recordSubTree internal error; unexpected NewSubTree"
TreeCommit f fm s -> mkTreeOutput fm CommitObject s f
hPutStr h "\NUL" -- signal end of tree to --batch
receive h = getSha "mktree" (hGetLine h)
+treeMode :: FileMode
+treeMode = 0o040000
+
mkTreeOutput :: FileMode -> ObjectType -> Sha -> TopFilePath -> String
mkTreeOutput fm ot s f = concat
[ showOct fm ""
diff --git a/Logs.hs b/Logs.hs
index 716520af4..7b6c7dd20 100644
--- a/Logs.hs
+++ b/Logs.hs
@@ -42,6 +42,7 @@ topLevelUUIDBasedLogs =
, activityLog
, differenceLog
, multicastLog
+ , exportLog
]
{- All the ways to get a key from a presence log file -}
@@ -97,6 +98,9 @@ differenceLog = "difference.log"
multicastLog :: FilePath
multicastLog = "multicast.log"
+exportLog :: FilePath
+exportLog = "export.log"
+
{- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> String
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"
diff --git a/Logs/Export.hs b/Logs/Export.hs
new file mode 100644
index 000000000..3ba77cd24
--- /dev/null
+++ b/Logs/Export.hs
@@ -0,0 +1,123 @@
+{- git-annex export log
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.Export where
+
+import qualified Data.Map as M
+
+import Annex.Common
+import qualified Annex.Branch
+import qualified Git
+import qualified Git.Branch
+import Git.Tree
+import Git.Sha
+import Git.FilePath
+import Logs
+import Logs.UUIDBased
+import Annex.UUID
+
+data Exported = Exported
+ { exportedTreeish :: Git.Ref
+ , incompleteExportedTreeish :: [Git.Ref]
+ }
+ deriving (Eq)
+
+-- | Get what's been exported to a special remote.
+--
+-- If the list contains multiple items, there was an export conflict,
+-- and different trees were exported to the same special remote.
+getExport :: UUID -> Annex [Exported]
+getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
+ . parseLogNew parseExportLog
+ <$> Annex.Branch.get exportLog
+ where
+ get (ExportLog exported u)
+ | u == remoteuuid = Just exported
+ | otherwise = Nothing
+
+data ExportChange = ExportChange
+ { oldTreeish :: [Git.Ref]
+ , newTreeish :: Git.Ref
+ }
+
+-- | Record a change in what's exported to a special remote.
+--
+-- This is called before an export begins uploading new files to the
+-- remote, but after it's cleaned up any files that need to be deleted
+-- from the old treeish.
+--
+-- Any entries in the log for the oldTreeish will be updated to the
+-- newTreeish. This way, when multiple repositories are exporting to
+-- the same special remote, there's no conflict as long as they move
+-- forward in lock-step.
+--
+-- Also, the newTreeish is grafted into the git-annex branch. This is done
+-- to ensure that it's available later.
+recordExport :: UUID -> ExportChange -> Annex ()
+recordExport remoteuuid ec = do
+ c <- liftIO currentVectorClock
+ u <- getUUID
+ let val = ExportLog (Exported (newTreeish ec) []) remoteuuid
+ Annex.Branch.change exportLog $
+ showLogNew formatExportLog
+ . changeLog c u val
+ . M.mapWithKey (updateothers c u)
+ . parseLogNew parseExportLog
+ where
+ updateothers c u theiru le@(LogEntry _ (ExportLog exported@(Exported { exportedTreeish = t }) remoteuuid'))
+ | u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
+ | otherwise = LogEntry c (ExportLog (exported { exportedTreeish = newTreeish ec }) theiru)
+
+-- | Record the beginning of an export, to allow cleaning up from
+-- interrupted exports.
+--
+-- This is called before any changes are made to the remote.
+recordExportBeginning :: UUID -> Git.Ref -> Annex ()
+recordExportBeginning remoteuuid newtree = do
+ c <- liftIO currentVectorClock
+ u <- getUUID
+ ExportLog old _ <- fromMaybe (ExportLog (Exported emptyTree []) remoteuuid)
+ . M.lookup u . simpleMap
+ . parseLogNew parseExportLog
+ <$> Annex.Branch.get exportLog
+ let new = old { incompleteExportedTreeish = newtree:incompleteExportedTreeish old }
+ Annex.Branch.change exportLog $
+ showLogNew formatExportLog
+ . changeLog c u (ExportLog new remoteuuid)
+ . parseLogNew parseExportLog
+ graftTreeish newtree
+
+data ExportLog = ExportLog Exported UUID
+
+formatExportLog :: ExportLog -> String
+formatExportLog (ExportLog exported remoteuuid) = unwords $
+ [ Git.fromRef (exportedTreeish exported)
+ , fromUUID remoteuuid
+ ] ++ map Git.fromRef (incompleteExportedTreeish exported)
+
+parseExportLog :: String -> Maybe ExportLog
+parseExportLog s = case words s of
+ (et:u:it) -> Just $
+ ExportLog (Exported (Git.Ref et) (map Git.Ref it)) (toUUID u)
+ _ -> Nothing
+
+-- To prevent git-annex branch merge conflicts, the treeish is
+-- first grafted in and then removed in a subsequent commit.
+graftTreeish :: Git.Ref -> Annex ()
+graftTreeish treeish = do
+ branchref <- Annex.Branch.getBranch
+ Tree t <- inRepo $ getTree branchref
+ t' <- inRepo $ recordTree $ Tree $
+ RecordedSubTree (asTopFilePath graftpoint) treeish [] : t
+ commit <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit
+ "export tree" [branchref] t'
+ origtree <- inRepo $ recordTree (Tree t)
+ commit' <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit
+ "export tree cleanup" [commit] origtree
+ inRepo $ Git.Branch.update' Annex.Branch.fullname commit'
+ where
+ graftpoint = "export.tree"
diff --git a/Logs/Trust.hs b/Logs/Trust.hs
index 4f685be91..54cafc9f4 100644
--- a/Logs/Trust.hs
+++ b/Logs/Trust.hs
@@ -65,10 +65,16 @@ trustMap = maybe trustMapLoad return =<< Annex.getState Annex.trustmap
trustMapLoad :: Annex TrustMap
trustMapLoad = do
overrides <- Annex.getState Annex.forcetrust
+ l <- remoteList
+ -- Exports are never trusted, since they are not key/value stores.
+ exports <- filterM Types.Remote.isExportSupported l
+ let exportoverrides = M.fromList $
+ map (\r -> (Types.Remote.uuid r, UnTrusted)) exports
logged <- trustMapRaw
- configured <- M.fromList . catMaybes
- <$> (map configuredtrust <$> remoteList)
- let m = M.union overrides $ M.union configured logged
+ let configured = M.fromList $ mapMaybe configuredtrust l
+ let m = M.union exportoverrides $
+ M.union overrides $
+ M.union configured logged
Annex.changeState $ \s -> s { Annex.trustmap = Just m }
return m
where
diff --git a/Remote.hs b/Remote.hs
index 877c9f37d..8d826712c 100644
--- a/Remote.hs
+++ b/Remote.hs
@@ -53,6 +53,7 @@ module Remote (
checkAvailable,
isXMPPRemote,
claimingUrl,
+ isExportSupported,
) where
import Data.Ord
diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs
index 2f29f5baa..37594bd11 100644
--- a/Remote/BitTorrent.hs
+++ b/Remote/BitTorrent.hs
@@ -26,6 +26,7 @@ import Backend.URL
import Annex.Perms
import Annex.UUID
import qualified Annex.Url as Url
+import Remote.Helper.Export
import Network.URI
@@ -35,12 +36,13 @@ import qualified Data.ByteString.Lazy as B
#endif
remote :: RemoteType
-remote = RemoteType {
- typename = "bittorrent",
- enumerate = list,
- generate = gen,
- setup = error "not supported"
-}
+remote = RemoteType
+ { typename = "bittorrent"
+ , enumerate = list
+ , generate = gen
+ , setup = error "not supported"
+ , exportSupported = exportUnsupported
+ }
-- There is only one bittorrent remote, and it always exists.
list :: Bool -> Annex [Git.Repo]
@@ -61,6 +63,7 @@ gen r _ c gc =
, lockContent = Nothing
, checkPresent = checkKey
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/Bup.hs b/Remote/Bup.hs
index 3a2d67bc8..4180cbb7d 100644
--- a/Remote/Bup.hs
+++ b/Remote/Bup.hs
@@ -25,6 +25,7 @@ import Config.Cost
import qualified Remote.Helper.Ssh as Ssh
import Remote.Helper.Special
import Remote.Helper.Messages
+import Remote.Helper.Export
import Utility.Hash
import Utility.UserInfo
import Annex.UUID
@@ -34,12 +35,13 @@ import Utility.Metered
type BupRepo = String
remote :: RemoteType
-remote = RemoteType {
- typename = "bup",
- enumerate = const (findSpecialRemotes "buprepo"),
- generate = gen,
- setup = bupSetup
-}
+remote = RemoteType
+ { typename = "bup"
+ , enumerate = const (findSpecialRemotes "buprepo")
+ , generate = gen
+ , setup = bupSetup
+ , exportSupported = exportUnsupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@@ -61,6 +63,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = bupLocal buprepo
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs
index 2f8c3b345..3949bf569 100644
--- a/Remote/Ddar.hs
+++ b/Remote/Ddar.hs
@@ -19,6 +19,7 @@ import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
+import Remote.Helper.Export
import Annex.Ssh
import Annex.UUID
import Utility.SshHost
@@ -29,12 +30,13 @@ data DdarRepo = DdarRepo
}
remote :: RemoteType
-remote = RemoteType {
- typename = "ddar",
- enumerate = const (findSpecialRemotes "ddarrepo"),
- generate = gen,
- setup = ddarSetup
-}
+remote = RemoteType
+ { typename = "ddar"
+ , enumerate = const (findSpecialRemotes "ddarrepo")
+ , generate = gen
+ , setup = ddarSetup
+ , exportSupported = exportUnsupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@@ -60,6 +62,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = ddarLocal ddarrepo
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/Directory.hs b/Remote/Directory.hs
index 2452c42e2..22413b7e9 100644
--- a/Remote/Directory.hs
+++ b/Remote/Directory.hs
@@ -1,6 +1,6 @@
{- A "remote" that is just a filesystem directory.
-
- - Copyright 2011-2014 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -25,18 +25,21 @@ import Config.Cost
import Config
import Utility.FileMode
import Remote.Helper.Special
+import Remote.Helper.Export
import qualified Remote.Directory.LegacyChunked as Legacy
import Annex.Content
import Annex.UUID
import Utility.Metered
+import Utility.Tmp
remote :: RemoteType
-remote = RemoteType {
- typename = "directory",
- enumerate = const (findSpecialRemotes "directory"),
- generate = gen,
- setup = directorySetup
-}
+remote = RemoteType
+ { typename = "directory"
+ , enumerate = const (findSpecialRemotes "directory")
+ , generate = gen
+ , setup = directorySetup
+ , exportSupported = exportIsSupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@@ -58,6 +61,13 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = True
+ , exportActions = ExportActions
+ { storeExport = storeExportDirectory dir
+ , retrieveExport = retrieveExportDirectory dir
+ , removeExport = removeExportDirectory dir
+ , checkPresentExport = checkPresentExportDirectory dir
+ , renameExport = renameExportDirectory dir
+ }
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
@@ -111,24 +121,21 @@ getLocation d k = do
storeDir :: FilePath -> Key -> FilePath
storeDir d k = addTrailingPathSeparator $ d </> hashDirLower def k </> keyFile k
-{- Where we store temporary data for a key, in the directory, as it's being
- - written. -}
-tmpDir :: FilePath -> Key -> FilePath
-tmpDir d k = addTrailingPathSeparator $ d </> "tmp" </> keyFile k
-
{- Check if there is enough free disk space in the remote's directory to
- store the key. Note that the unencrypted key size is checked. -}
prepareStore :: FilePath -> ChunkConfig -> Preparer Storer
-prepareStore d chunkconfig = checkPrepare checker
+prepareStore d chunkconfig = checkPrepare (checkDiskSpaceDirectory d)
(byteStorer $ store d chunkconfig)
where
- checker k = do
- annexdir <- fromRepo gitAnnexObjectDir
- samefilesystem <- liftIO $ catchDefaultIO False $
- (\a b -> deviceID a == deviceID b)
- <$> getFileStatus d
- <*> getFileStatus annexdir
- checkDiskSpace (Just d) k 0 samefilesystem
+
+checkDiskSpaceDirectory :: FilePath -> Key -> Annex Bool
+checkDiskSpaceDirectory d k = do
+ annexdir <- fromRepo gitAnnexObjectDir
+ samefilesystem <- liftIO $ catchDefaultIO False $
+ (\a b -> deviceID a == deviceID b)
+ <$> getFileStatus d
+ <*> getFileStatus annexdir
+ checkDiskSpace (Just d) k 0 samefilesystem
store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex Bool
store d chunkconfig k b p = liftIO $ do
@@ -141,7 +148,7 @@ store d chunkconfig k b p = liftIO $ do
finalizeStoreGeneric tmpdir destdir
return True
where
- tmpdir = tmpDir d k
+ tmpdir = addTrailingPathSeparator $ d </> "tmp" </> keyFile k
destdir = storeDir d k
{- Passed a temp directory that contains the files that should be placed
@@ -211,11 +218,66 @@ removeDirGeneric topdir dir = do
checkKey :: FilePath -> ChunkConfig -> CheckPresent
checkKey d (LegacyChunks _) k = Legacy.checkKey d locations k
-checkKey d _ k = liftIO $
- ifM (anyM doesFileExist (locations d k))
+checkKey d _ k = checkPresentGeneric d (locations d k)
+
+checkPresentGeneric :: FilePath -> [FilePath] -> Annex Bool
+checkPresentGeneric d ps = liftIO $
+ ifM (anyM doesFileExist ps)
( return True
, ifM (doesDirectoryExist d)
( return False
, giveup $ "directory " ++ d ++ " is not accessible"
)
)
+
+storeExportDirectory :: FilePath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
+storeExportDirectory d src _k loc p = liftIO $ catchBoolIO $ do
+ createDirectoryIfMissing True (takeDirectory dest)
+ -- Write via temp file so that checkPresentGeneric will not
+ -- see it until it's fully stored.
+ viaTmp (\tmp () -> withMeteredFile src p (L.writeFile tmp)) dest ()
+ return True
+ where
+ dest = exportPath d loc
+
+retrieveExportDirectory :: FilePath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
+retrieveExportDirectory d _k loc dest p = unVerified $ liftIO $ catchBoolIO $ do
+ withMeteredFile src p (L.writeFile dest)
+ return True
+ where
+ src = exportPath d loc
+
+removeExportDirectory :: FilePath -> Key -> ExportLocation -> Annex Bool
+removeExportDirectory d _k loc = liftIO $ do
+ nukeFile src
+ removeExportLocation d loc
+ return True
+ where
+ src = exportPath d loc
+
+checkPresentExportDirectory :: FilePath -> Key -> ExportLocation -> Annex Bool
+checkPresentExportDirectory d _k loc =
+ checkPresentGeneric d [exportPath d loc]
+
+renameExportDirectory :: FilePath -> Key -> ExportLocation -> ExportLocation -> Annex Bool
+renameExportDirectory d _k oldloc newloc = liftIO $ catchBoolIO $ do
+ createDirectoryIfMissing True (takeDirectory dest)
+ renameFile src dest
+ removeExportLocation d oldloc
+ return True
+ where
+ src = exportPath d oldloc
+ dest = exportPath d newloc
+
+exportPath :: FilePath -> ExportLocation -> FilePath
+exportPath d (ExportLocation loc) = d </> loc
+
+{- Removes the ExportLocation directory and its parents, so long as
+ - they're empty, up to but not including the topdir. -}
+removeExportLocation :: FilePath -> ExportLocation -> IO ()
+removeExportLocation topdir (ExportLocation loc) = go (Just loc) (Right ())
+ where
+ go _ (Left _e) = return ()
+ go Nothing _ = return ()
+ go (Just loc') _ = go (upFrom loc')
+ =<< tryIO (removeDirectory $ exportPath topdir (ExportLocation loc'))
diff --git a/Remote/External.hs b/Remote/External.hs
index 32b95e9bb..71a07d3ea 100644
--- a/Remote/External.hs
+++ b/Remote/External.hs
@@ -18,6 +18,7 @@ import Config
import Git.Config (isTrue, boolConfig)
import Git.Env
import Remote.Helper.Special
+import Remote.Helper.Export
import Remote.Helper.ReadOnly
import Remote.Helper.Messages
import Utility.Metered
@@ -39,12 +40,13 @@ import System.Log.Logger (debugM)
import qualified Data.Map as M
remote :: RemoteType
-remote = RemoteType {
- typename = "external",
- enumerate = const (findSpecialRemotes "externaltype"),
- generate = gen,
- setup = externalSetup
-}
+remote = RemoteType
+ { typename = "external"
+ , enumerate = const (findSpecialRemotes "externaltype")
+ , generate = gen
+ , setup = externalSetup
+ , exportSupported = exportUnsupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc
@@ -85,6 +87,7 @@ gen r u c gc
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = towhereis
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs
index 2ccc47ad8..3270a1dc7 100644
--- a/Remote/GCrypt.hs
+++ b/Remote/GCrypt.hs
@@ -38,6 +38,7 @@ import Remote.Helper.Git
import Remote.Helper.Encryptable
import Remote.Helper.Special
import Remote.Helper.Messages
+import Remote.Helper.Export
import qualified Remote.Helper.Ssh as Ssh
import Utility.Metered
import Annex.UUID
@@ -51,14 +52,15 @@ import Utility.Gpg
import Utility.SshHost
remote :: RemoteType
-remote = RemoteType {
- typename = "gcrypt",
+remote = RemoteType
+ { typename = "gcrypt"
-- Remote.Git takes care of enumerating gcrypt remotes too,
-- and will call our gen on them.
- enumerate = const (return []),
- generate = gen,
- setup = gCryptSetup
-}
+ , enumerate = const (return [])
+ , generate = gen
+ , setup = gCryptSetup
+ , exportSupported = exportUnsupported
+ }
chainGen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
chainGen gcryptr u c gc = do
@@ -114,6 +116,7 @@ gen' r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = repoCheap r
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/Git.hs b/Remote/Git.hs
index b48b48b52..02957fda2 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -50,6 +50,7 @@ import Utility.Batch
import Utility.SimpleProtocol
import Remote.Helper.Git
import Remote.Helper.Messages
+import Remote.Helper.Export
import qualified Remote.Helper.Ssh as Ssh
import qualified Remote.GCrypt
import qualified Remote.P2P
@@ -66,12 +67,13 @@ import qualified Data.Map as M
import Network.URI
remote :: RemoteType
-remote = RemoteType {
- typename = "git",
- enumerate = list,
- generate = gen,
- setup = gitSetup
-}
+remote = RemoteType
+ { typename = "git"
+ , enumerate = list
+ , generate = gen
+ , setup = gitSetup
+ , exportSupported = exportUnsupported
+ }
list :: Bool -> Annex [Git.Repo]
list autoinit = do
@@ -110,7 +112,7 @@ gitSetup Init mu _ c _ = do
if isNothing mu || mu == Just u
then return (c, u)
else error "git remote did not have specified uuid"
-gitSetup Enable (Just u) _ c _ = do
+gitSetup (Enable _) (Just u) _ c _ = do
inRepo $ Git.Command.run
[ Param "remote"
, Param "add"
@@ -118,7 +120,7 @@ gitSetup Enable (Just u) _ c _ = do
, Param $ fromMaybe (giveup "no location") (M.lookup "location" c)
]
return (c, u)
-gitSetup Enable Nothing _ _ _ = error "unable to enable git remote with no specified uuid"
+gitSetup (Enable _) Nothing _ _ _ = error "unable to enable git remote with no specified uuid"
{- It's assumed to be cheap to read the config of non-URL remotes, so this is
- done each time git-annex is run in a way that uses remotes.
@@ -157,6 +159,7 @@ gen r u c gc
, lockContent = Just (lockKey new)
, checkPresent = inAnnex new
, checkPresentCheap = repoCheap r
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = if Git.repoIsUrl r
then Nothing
diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs
index c2f9bcf12..40a92c700 100644
--- a/Remote/Glacier.hs
+++ b/Remote/Glacier.hs
@@ -18,6 +18,7 @@ import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Messages
+import Remote.Helper.Export
import qualified Remote.Helper.AWS as AWS
import Creds
import Utility.Metered
@@ -29,12 +30,13 @@ type Vault = String
type Archive = FilePath
remote :: RemoteType
-remote = RemoteType {
- typename = "glacier",
- enumerate = const (findSpecialRemotes "glacier"),
- generate = gen,
- setup = glacierSetup
-}
+remote = RemoteType
+ { typename = "glacier"
+ , enumerate = const (findSpecialRemotes "glacier")
+ , generate = gen
+ , setup = glacierSetup
+ , exportSupported = exportUnsupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost
@@ -57,6 +59,7 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
@@ -87,8 +90,9 @@ glacierSetup' ss u mcreds c gc = do
(c', encsetup) <- encryptionSetup c gc
c'' <- setRemoteCredPair encsetup c' gc (AWS.creds u) mcreds
let fullconfig = c'' `M.union` defaults
- when (ss == Init) $
- genVault fullconfig gc u
+ case ss of
+ Init -> genVault fullconfig gc u
+ _ -> return ()
gitConfigSpecialRemote u fullconfig "glacier" "true"
return (fullconfig, u)
where
diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs
index 1fe6d75be..97e55a415 100644
--- a/Remote/Helper/Encryptable.hs
+++ b/Remote/Helper/Encryptable.hs
@@ -15,6 +15,7 @@ module Remote.Helper.Encryptable (
embedCreds,
cipherKey,
extractCipher,
+ isEncrypted,
describeEncryption,
) where
@@ -57,7 +58,7 @@ encryptionSetup c gc = do
encryption = M.lookup "encryption" c
-- Generate a new cipher, depending on the chosen encryption scheme
genCipher cmd = case encryption of
- _ | M.member "cipher" c || M.member "cipherkeys" c || M.member "pubkeys" c -> cannotchange
+ _ | hasEncryptionConfig c -> cannotchange
Just "none" -> return (c, NoEncryption)
Just "shared" -> encsetup $ genSharedCipher cmd
-- hybrid encryption is the default when a keyid is
@@ -167,6 +168,15 @@ extractCipher c = case (M.lookup "cipher" c,
where
readkeys = KeyIds . splitc ','
+isEncrypted :: RemoteConfig -> Bool
+isEncrypted c = case M.lookup "encryption" c of
+ Just "none" -> False
+ Just _ -> True
+ Nothing -> hasEncryptionConfig c
+
+hasEncryptionConfig :: RemoteConfig -> Bool
+hasEncryptionConfig c = M.member "cipher" c || M.member "cipherkeys" c || M.member "pubkeys" c
+
describeEncryption :: RemoteConfig -> String
describeEncryption c = case extractCipher c of
Nothing -> "none"
diff --git a/Remote/Helper/Export.hs b/Remote/Helper/Export.hs
new file mode 100644
index 000000000..58533155b
--- /dev/null
+++ b/Remote/Helper/Export.hs
@@ -0,0 +1,126 @@
+{- exports to remotes
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE FlexibleInstances #-}
+
+module Remote.Helper.Export where
+
+import Annex.Common
+import Types.Remote
+import Types.Backend
+import Types.Key
+import Backend
+import Remote.Helper.Encryptable (isEncrypted)
+import Database.Export
+
+import qualified Data.Map as M
+
+-- | Use for remotes that do not support exports.
+class HasExportUnsupported a where
+ exportUnsupported :: a
+
+instance HasExportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) where
+ exportUnsupported = \_ _ -> return False
+
+instance HasExportUnsupported (ExportActions Annex) where
+ exportUnsupported = ExportActions
+ { storeExport = \_ _ _ _ -> return False
+ , retrieveExport = \_ _ _ _ -> return (False, UnVerified)
+ , removeExport = \_ _ -> return False
+ , checkPresentExport = \_ _ -> return False
+ , renameExport = \_ _ _ -> return False
+ }
+
+exportIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool
+exportIsSupported = \_ _ -> return True
+
+-- | Prevent or allow exporttree=yes when setting up a new remote,
+-- depending on exportSupported and other configuration.
+adjustExportableRemoteType :: RemoteType -> RemoteType
+adjustExportableRemoteType rt = rt { setup = setup' }
+ where
+ setup' st mu cp c gc = do
+ let cont = setup rt st mu cp c gc
+ ifM (exportSupported rt c gc)
+ ( case st of
+ Init -> case M.lookup "exporttree" c of
+ Just "yes" | isEncrypted c ->
+ giveup "cannot enable both encryption and exporttree"
+ _ -> cont
+ Enable oldc
+ | M.lookup "exporttree" c /= M.lookup "exporttree" oldc ->
+ giveup "cannot change exporttree of existing special remote"
+ | otherwise -> cont
+ , case M.lookup "exporttree" c of
+ Just "yes" -> giveup "exporttree=yes is not supported by this special remote"
+ _ -> cont
+ )
+
+-- | If the remote is exportSupported, and exporttree=yes, adjust the
+-- remote to be an export.
+adjustExportable :: Remote -> Annex Remote
+adjustExportable r = case M.lookup "exporttree" (config r) of
+ Just "yes" -> ifM (isExportSupported r)
+ ( isexport
+ , notexport
+ )
+ _ -> notexport
+ where
+ notexport = return $ r { exportActions = exportUnsupported }
+ isexport = do
+ db <- openDb (uuid r)
+ return $ r
+ -- Storing a key on an export would need a way to
+ -- look up the file(s) that the currently exported
+ -- tree uses for a key; there's not currently an
+ -- inexpensive way to do that (getExportLocation
+ -- only finds files that have been stored on the
+ -- export already).
+ { storeKey = \_ _ _ -> do
+ warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
+ return False
+ -- Keys can be retrieved, but since an export
+ -- is not a true key/value store, the content of
+ -- the key has to be able to be strongly verified.
+ , retrieveKeyFile = \k _af dest p ->
+ if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
+ then do
+ locs <- liftIO $ getExportLocation db k
+ case locs of
+ [] -> do
+ warning "unknown export location"
+ return (False, UnVerified)
+ (l:_) -> retrieveExport (exportActions r) k l dest p
+ else do
+ warning $ "exported content cannot be verified due to using the " ++ formatKeyVariety (keyVariety k) ++ " backend"
+ return (False, UnVerified)
+ , retrieveKeyFileCheap = \_ _ _ -> return False
+ -- Remove all files a key was exported to.
+ , removeKey = \k -> do
+ locs <- liftIO $ getExportLocation db k
+ oks <- forM locs $ \loc -> do
+ ok <- removeExport (exportActions r) k loc
+ when ok $
+ liftIO $ removeExportLocation db k loc
+ return ok
+ liftIO $ flushDbQueue db
+ return (and oks)
+ -- Can't lock content on exports, since they're
+ -- not key/value stores, and someone else could
+ -- change what's exported to a file at any time.
+ , lockContent = Nothing
+ -- Check if any of the files a key was exported
+ -- to are present. This doesn't guarantee the
+ -- export contains the right content.
+ , checkPresent = \k ->
+ anyM (checkPresentExport (exportActions r) k)
+ =<< liftIO (getExportLocation db k)
+ , mkUnavailable = return Nothing
+ , getInfo = do
+ is <- getInfo r
+ return (is++[("export", "yes")])
+ }
diff --git a/Remote/Hook.hs b/Remote/Hook.hs
index 0ebbf9139..d7c7eb6b8 100644
--- a/Remote/Hook.hs
+++ b/Remote/Hook.hs
@@ -16,6 +16,7 @@ import Config.Cost
import Annex.UUID
import Remote.Helper.Special
import Remote.Helper.Messages
+import Remote.Helper.Export
import Utility.Env
import Messages.Progress
@@ -25,12 +26,13 @@ type Action = String
type HookName = String
remote :: RemoteType
-remote = RemoteType {
- typename = "hook",
- enumerate = const (findSpecialRemotes "hooktype"),
- generate = gen,
- setup = hookSetup
-}
+remote = RemoteType
+ { typename = "hook"
+ , enumerate = const (findSpecialRemotes "hooktype")
+ , generate = gen
+ , setup = hookSetup
+ , exportSupported = exportUnsupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@@ -51,6 +53,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/List.hs b/Remote/List.hs
index a5e305622..2dc5e4823 100644
--- a/Remote/List.hs
+++ b/Remote/List.hs
@@ -18,6 +18,7 @@ import Types.Remote
import Annex.UUID
import Remote.Helper.Hooks
import Remote.Helper.ReadOnly
+import Remote.Helper.Export
import qualified Git
import qualified Git.Config
@@ -42,7 +43,7 @@ import qualified Remote.Hook
import qualified Remote.External
remoteTypes :: [RemoteType]
-remoteTypes =
+remoteTypes = map adjustExportableRemoteType
[ Remote.Git.remote
, Remote.GCrypt.remote
, Remote.P2P.remote
@@ -100,8 +101,9 @@ remoteGen m t r = do
u <- getRepoUUID r
gc <- Annex.getRemoteGitConfig r
let c = fromMaybe M.empty $ M.lookup u m
- mrmt <- generate t r u c gc
- return $ adjustReadOnly . addHooks <$> mrmt
+ generate t r u c gc >>= maybe
+ (return Nothing)
+ (Just <$$> adjustExportable . adjustReadOnly . addHooks)
{- Updates a local git Remote, re-reading its git config. -}
updateRemote :: Remote -> Annex (Maybe Remote)
diff --git a/Remote/P2P.hs b/Remote/P2P.hs
index 118262b3c..be0d4589f 100644
--- a/Remote/P2P.hs
+++ b/Remote/P2P.hs
@@ -24,6 +24,7 @@ import Annex.UUID
import Config
import Config.Cost
import Remote.Helper.Git
+import Remote.Helper.Export
import Messages.Progress
import Utility.Metered
import Utility.AuthToken
@@ -33,14 +34,15 @@ import Control.Concurrent
import Control.Concurrent.STM
remote :: RemoteType
-remote = RemoteType {
- typename = "p2p",
+remote = RemoteType
+ { typename = "p2p"
-- Remote.Git takes care of enumerating P2P remotes,
-- and will call chainGen on them.
- enumerate = const (return []),
- generate = \_ _ _ _ -> return Nothing,
- setup = error "P2P remotes are set up using git-annex p2p"
-}
+ , enumerate = const (return [])
+ , generate = \_ _ _ _ -> return Nothing
+ , setup = error "P2P remotes are set up using git-annex p2p"
+ , exportSupported = exportUnsupported
+ }
chainGen :: P2PAddress -> Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
chainGen addr r u c gc = do
@@ -57,6 +59,7 @@ chainGen addr r u c gc = do
, lockContent = Just (lock u addr connpool)
, checkPresent = checkpresent u addr connpool
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs
index 4fc55d725..79aebad6b 100644
--- a/Remote/Rsync.hs
+++ b/Remote/Rsync.hs
@@ -28,6 +28,7 @@ import Annex.UUID
import Annex.Ssh
import Remote.Helper.Special
import Remote.Helper.Messages
+import Remote.Helper.Export
import Remote.Rsync.RsyncUrl
import Crypto
import Utility.Rsync
@@ -43,12 +44,13 @@ import Utility.SshHost
import qualified Data.Map as M
remote :: RemoteType
-remote = RemoteType {
- typename = "rsync",
- enumerate = const (findSpecialRemotes "rsyncurl"),
- generate = gen,
- setup = rsyncSetup
-}
+remote = RemoteType
+ { typename = "rsync"
+ , enumerate = const (findSpecialRemotes "rsyncurl")
+ , generate = gen
+ , setup = rsyncSetup
+ , exportSupported = exportUnsupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@@ -73,6 +75,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/S3.hs b/Remote/S3.hs
index c05831b0b..4b56cce29 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -39,6 +39,7 @@ import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Http
import Remote.Helper.Messages
+import Remote.Helper.Export
import qualified Remote.Helper.AWS as AWS
import Creds
import Annex.UUID
@@ -53,12 +54,13 @@ import Utility.Url (checkBoth, managerSettings, closeManager)
type BucketName = String
remote :: RemoteType
-remote = RemoteType {
- typename = "S3",
- enumerate = const (findSpecialRemotes "s3"),
- generate = gen,
- setup = s3Setup
-}
+remote = RemoteType
+ { typename = "S3"
+ , enumerate = const (findSpecialRemotes "s3")
+ , generate = gen
+ , setup = s3Setup
+ , exportSupported = exportUnsupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@@ -84,6 +86,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = Just (getWebUrls info)
, remoteFsck = Nothing
, repairRepo = Nothing
@@ -127,8 +130,9 @@ s3Setup' ss u mcreds c gc
(c', encsetup) <- encryptionSetup c gc
c'' <- setRemoteCredPair encsetup c' gc (AWS.creds u) mcreds
let fullconfig = c'' `M.union` defaults
- when (ss == Init) $
- genBucket fullconfig gc u
+ case ss of
+ Init -> genBucket fullconfig gc u
+ _ -> return ()
use fullconfig
archiveorg = do
diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs
index e4686f2f2..d3d52d7de 100644
--- a/Remote/Tahoe.hs
+++ b/Remote/Tahoe.hs
@@ -34,6 +34,7 @@ import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
+import Remote.Helper.Export
import Annex.UUID
import Annex.Content
import Logs.RemoteState
@@ -51,12 +52,13 @@ type IntroducerFurl = String
type Capability = String
remote :: RemoteType
-remote = RemoteType {
- typename = "tahoe",
- enumerate = const (findSpecialRemotes "tahoe"),
- generate = gen,
- setup = tahoeSetup
-}
+remote = RemoteType
+ { typename = "tahoe"
+ , enumerate = const (findSpecialRemotes "tahoe")
+ , generate = gen
+ , setup = tahoeSetup
+ , exportSupported = exportUnsupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@@ -75,6 +77,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkKey u hdl
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = Just (getWhereisKey u)
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/Web.hs b/Remote/Web.hs
index be2f265e0..f3580ca99 100644
--- a/Remote/Web.hs
+++ b/Remote/Web.hs
@@ -10,6 +10,7 @@ module Remote.Web (remote, getWebUrls) where
import Annex.Common
import Types.Remote
import Remote.Helper.Messages
+import Remote.Helper.Export
import qualified Git
import qualified Git.Construct
import Annex.Content
@@ -22,12 +23,13 @@ import Annex.Quvi
import qualified Utility.Quvi as Quvi
remote :: RemoteType
-remote = RemoteType {
- typename = "web",
- enumerate = list,
- generate = gen,
- setup = error "not supported"
-}
+remote = RemoteType
+ { typename = "web"
+ , enumerate = list
+ , generate = gen
+ , setup = error "not supported"
+ , exportSupported = exportUnsupported
+ }
-- There is only one web remote, and it always exists.
-- (If the web should cease to exist, remove this module and redistribute
@@ -50,6 +52,7 @@ gen r _ c gc =
, lockContent = Nothing
, checkPresent = checkKey
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs
index 2c4d24c35..4cc3c92e0 100644
--- a/Remote/WebDAV.hs
+++ b/Remote/WebDAV.hs
@@ -28,6 +28,7 @@ import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Http
+import Remote.Helper.Export
import qualified Remote.Helper.Chunked.Legacy as Legacy
import Creds
import Utility.Metered
@@ -40,12 +41,13 @@ import Network.HTTP.Client (HttpExceptionContent(..), responseStatus)
#endif
remote :: RemoteType
-remote = RemoteType {
- typename = "webdav",
- enumerate = const (findSpecialRemotes "webdav"),
- generate = gen,
- setup = webdavSetup
-}
+remote = RemoteType
+ { typename = "webdav"
+ , enumerate = const (findSpecialRemotes "webdav")
+ , generate = gen
+ , setup = webdavSetup
+ , exportSupported = exportUnsupported
+ }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
@@ -68,6 +70,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
+ , exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
diff --git a/Types/Remote.hs b/Types/Remote.hs
index bd75840b3..e2f36a55b 100644
--- a/Types/Remote.hs
+++ b/Types/Remote.hs
@@ -2,7 +2,7 @@
-
- Most things should not need this, using Types instead
-
- - Copyright 2011-2014 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -18,6 +18,9 @@ module Types.Remote
, Availability(..)
, Verification(..)
, unVerified
+ , ExportLocation(..)
+ , isExportSupported
+ , ExportActions(..)
)
where
@@ -34,7 +37,7 @@ import Types.UrlContents
import Types.NumCopies
import Config.Cost
import Utility.Metered
-import Git.Types
+import Git.Types (RemoteName)
import Utility.SafeCommand
import Utility.Url
@@ -42,92 +45,96 @@ type RemoteConfigKey = String
type RemoteConfig = M.Map RemoteConfigKey String
-data SetupStage = Init | Enable
- deriving (Eq)
+data SetupStage = Init | Enable RemoteConfig
{- There are different types of remotes. -}
-data RemoteTypeA a = RemoteType {
+data RemoteTypeA a = RemoteType
-- human visible type name
- typename :: String,
+ { typename :: String
-- enumerates remotes of this type
-- The Bool is True if automatic initialization of remotes is desired
- enumerate :: Bool -> a [Git.Repo],
- -- generates a remote of this type
- generate :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> a (Maybe (RemoteA a)),
+ , enumerate :: Bool -> a [Git.Repo]
+ -- generates a remote of this type from the current git config
+ , generate :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> a (Maybe (RemoteA a))
-- initializes or enables a remote
- setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID)
-}
+ , setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID)
+ -- check if a remote of this type is able to support export
+ , exportSupported :: RemoteConfig -> RemoteGitConfig -> a Bool
+ }
instance Eq (RemoteTypeA a) where
x == y = typename x == typename y
{- An individual remote. -}
-data RemoteA a = Remote {
+data RemoteA a = Remote
-- each Remote has a unique uuid
- uuid :: UUID,
+ { uuid :: UUID
-- each Remote has a human visible name
- name :: RemoteName,
+ , name :: RemoteName
-- Remotes have a use cost; higher is more expensive
- cost :: Cost,
+ , cost :: Cost
+
-- Transfers a key's contents from disk to the remote.
-- The key should not appear to be present on the remote until
-- all of its contents have been transferred.
- storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool,
+ , storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool
-- Retrieves a key's contents to a file.
-- (The MeterUpdate does not need to be used if it writes
-- sequentially to the file.)
- retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a (Bool, Verification),
+ , retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a (Bool, Verification)
-- Retrieves a key's contents to a tmp file, if it can be done cheaply.
-- It's ok to create a symlink or hardlink.
- retrieveKeyFileCheap :: Key -> AssociatedFile -> FilePath -> a Bool,
+ , retrieveKeyFileCheap :: Key -> AssociatedFile -> FilePath -> a Bool
-- Removes a key's contents (succeeds if the contents are not present)
- removeKey :: Key -> a Bool,
+ , removeKey :: Key -> a Bool
-- Uses locking to prevent removal of a key's contents,
-- thus producing a VerifiedCopy, which is passed to the callback.
-- If unable to lock, does not run the callback, and throws an
-- error.
-- This is optional; remotes do not have to support locking.
- lockContent :: forall r. Maybe (Key -> (VerifiedCopy -> a r) -> a r),
+ , lockContent :: forall r. Maybe (Key -> (VerifiedCopy -> a r) -> a r)
-- Checks if a key is present in the remote.
-- Throws an exception if the remote cannot be accessed.
- checkPresent :: Key -> a Bool,
+ , checkPresent :: Key -> a Bool
-- Some remotes can checkPresent without an expensive network
-- operation.
- checkPresentCheap :: Bool,
+ , checkPresentCheap :: Bool
+ -- Some remotes support exports of trees.
+ , exportActions :: ExportActions a
-- Some remotes can provide additional details for whereis.
- whereisKey :: Maybe (Key -> a [String]),
+ , whereisKey :: Maybe (Key -> a [String])
-- Some remotes can run a fsck operation on the remote,
-- without transferring all the data to the local repo
-- The parameters are passed to the fsck command on the remote.
- remoteFsck :: Maybe ([CommandParam] -> a (IO Bool)),
+ , remoteFsck :: Maybe ([CommandParam] -> a (IO Bool))
-- Runs an action to repair the remote's git repository.
- repairRepo :: Maybe (a Bool -> a (IO Bool)),
+ , repairRepo :: Maybe (a Bool -> a (IO Bool))
-- a Remote has a persistent configuration store
- config :: RemoteConfig,
+ , config :: RemoteConfig
-- git repo for the Remote
- repo :: Git.Repo,
+ , repo :: Git.Repo
-- a Remote's configuration from git
- gitconfig :: RemoteGitConfig,
+ , gitconfig :: RemoteGitConfig
-- a Remote can be assocated with a specific local filesystem path
- localpath :: Maybe FilePath,
+ , localpath :: Maybe FilePath
-- a Remote can be known to be readonly
- readonly :: Bool,
+ , readonly :: Bool
-- a Remote can be globally available. (Ie, "in the cloud".)
- availability :: Availability,
+ , availability :: Availability
-- the type of the remote
- remotetype :: RemoteTypeA a,
+ , remotetype :: RemoteTypeA a
-- For testing, makes a version of this remote that is not
-- available for use. All its actions should fail.
- mkUnavailable :: a (Maybe (RemoteA a)),
+ , mkUnavailable :: a (Maybe (RemoteA a))
-- Information about the remote, for git annex info to display.
- getInfo :: a [(String, String)],
+ , getInfo :: a [(String, String)]
-- Some remotes can download from an url (or uri).
- claimUrl :: Maybe (URLString -> a Bool),
+ , claimUrl :: Maybe (URLString -> a Bool)
-- Checks that the url is accessible, and gets information about
-- its contents, without downloading the full content.
-- Throws an exception if the url is inaccessible.
- checkUrl :: Maybe (URLString -> a UrlContents)
-}
+ , checkUrl :: Maybe (URLString -> a UrlContents)
+ }
instance Show (RemoteA a) where
show remote = "Remote { name =\"" ++ name remote ++ "\" }"
@@ -150,3 +157,33 @@ unVerified :: Monad m => m Bool -> m (Bool, Verification)
unVerified a = do
ok <- a
return (ok, UnVerified)
+
+-- A location on a remote that a key can be exported to.
+-- The FilePath will be relative, and may contain unix-style path
+-- separators.
+newtype ExportLocation = ExportLocation FilePath
+ deriving (Show, Eq)
+
+isExportSupported :: RemoteA a -> a Bool
+isExportSupported r = exportSupported (remotetype r) (config r) (gitconfig r)
+
+data ExportActions a = ExportActions
+ -- Exports content to an ExportLocation.
+ -- The exported file should not appear to be present on the remote
+ -- until all of its contents have been transferred.
+ { storeExport :: FilePath -> Key -> ExportLocation -> MeterUpdate -> a Bool
+ -- Retrieves exported content to a file.
+ -- (The MeterUpdate does not need to be used if it writes
+ -- sequentially to the file.)
+ , retrieveExport :: Key -> ExportLocation -> FilePath -> MeterUpdate -> a (Bool, Verification)
+ -- Removes an exported file (succeeds if the contents are not present)
+ , removeExport :: Key -> ExportLocation -> a Bool
+ -- Checks if anything is exported to the remote at the specified
+ -- ExportLocation.
+ -- Throws an exception if the remote cannot be accessed.
+ , checkPresentExport :: Key -> ExportLocation -> a Bool
+ -- Renames an already exported file.
+ -- This may fail, if the file doesn't exist, or the remote does not
+ -- support renames.
+ , renameExport :: Key -> ExportLocation -> ExportLocation -> a Bool
+ }
diff --git a/Types/TrustLevel.hs b/Types/TrustLevel.hs
index 1cc4c662e..6ec18e512 100644
--- a/Types/TrustLevel.hs
+++ b/Types/TrustLevel.hs
@@ -21,7 +21,7 @@ import Types.UUID
-- This order may seem backwards, but we generally want to list dead
-- remotes last and trusted ones first.
data TrustLevel = Trusted | SemiTrusted | UnTrusted | DeadTrusted
- deriving (Eq, Enum, Ord, Bounded)
+ deriving (Eq, Enum, Ord, Bounded, Show)
instance Default TrustLevel where
def = SemiTrusted
diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs
index 6a541cfe4..ca611e0b4 100644
--- a/Utility/Tmp.hs
+++ b/Utility/Tmp.hs
@@ -28,7 +28,7 @@ type Template = String
{- Runs an action like writeFile, writing to a temp file first and
- then moving it into place. The temp file is stored in the same
- directory as the final file to avoid cross-device renames. -}
-viaTmp :: (MonadMask m, MonadIO m) => (FilePath -> String -> m ()) -> FilePath -> String -> m ()
+viaTmp :: (MonadMask m, MonadIO m) => (FilePath -> v -> m ()) -> FilePath -> v -> m ()
viaTmp a file content = bracketIO setup cleanup use
where
(dir, base) = splitFileName file
diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn
index ce7431141..6e7cc68db 100644
--- a/doc/design/exporting_trees_to_special_remotes.mdwn
+++ b/doc/design/exporting_trees_to_special_remotes.mdwn
@@ -15,13 +15,13 @@ when they want to export a tree. (It would also be possible to drop all content
from an existing special remote and reuse it, but there does not seem much
benefit in doing so.)
-Add a new `initremote` configuration `exporttree=true`, that cannot be
+Add a new `initremote` configuration `exporttree=yes`, that cannot be
changed by `enableremote`:
- git annex initremote myexport type=... exporttree=true
+ git annex initremote myexport type=... exporttree=yes
-It does not make sense to encrypt an export, so exporttree=true requires
-(and can even imply) encryption=false.
+It does not make sense to encrypt an export, so exporttree=yes requires
+encryption=none.
Note that the particular tree to export is not specified yet. This is
because the tree that is exported to a special remote may change.
@@ -69,11 +69,6 @@ To efficiently update an export, git-annex can diff the tree
that was exported with the new tree. The naive approach is to upload
new and modified files and remove deleted files.
-Note that a file may have been partially uploaded to an export, and then
-the export updated to a tree without that file. So, need to try to delete
-all removed files, even if location tracking does not say that the special
-remote contains them.
-
With rename detection, if the special remote supports moving files,
more efficient updates can be done. It gets complicated; consider two files
that swap names.
@@ -81,33 +76,6 @@ that swap names.
If the special remote supports copying files, that would also make some
updates more efficient.
-## resuming exports
-
-Resuming an interrupted export needs to work well.
-
-There are two cases here:
-
-1. Some of the files in the tree have been uploaded; others have not.
-2. A file has been partially uploaded.
-
-These two cases need to be disentangled somehow in order to handle
-them. One way is to use the location log as follows:
-
-* Before a file is uploaded, look up what key is currently exported
- using that filename. If there is one, update the location log,
- saying it's not present in the special remote.
-* Upload the file.
-* Update the location log for the newly exported key.
-
-Note that this method does not allow resuming a partial upload by appending to
-a file, because we don't know if the file actually started to be uploaded, or
-if the file instead still has the old key's content. Instead, the whole
-file needs to be re-uploaded.
-
-Alternative: Keep an index file that's the current state of the export.
-See comment #4 of [[todo/export]]. Not sure if that works? Perhaps it
-would be overkill if it's only used to support resuming partial uploads.
-
## changes to special remote interface
This needs some additional methods added to special remotes, and to
@@ -115,6 +83,10 @@ the [[external_special_remote_protocol]].
Here's the changes to the latter:
+* `EXPORTSUPPORTED`
+ Used to check if a special remote supports exports. The remote
+ responds with either `EXPORTSUPPORTED-SUCCESS` or
+ `EXPORTSUPPORTED-FAILURE`
* `EXPORT Name`
Comes immediately before each of the following requests,
specifying the name of the exported file. It will be in the form
@@ -123,6 +95,9 @@ Here's the changes to the latter:
* `TRANSFEREXPORT STORE|RETRIEVE Key File`
Requests the transfer of a File on local disk to or from the previously
provided Name on the special remote.
+ Note that it's important that, while a file is being stored,
+ CHECKPRESENTEXPORT not indicate it's present until all the data has
+ been transferred.
The remote responds with either `TRANSFER-SUCCESS` or
`TRANSFER-FAILURE`, and a remote where exports do not make sense
may always fail.
@@ -139,9 +114,8 @@ Here's the changes to the latter:
* `RENAMEEXPORT Key NewName`
Requests the remote rename a file stored on it from the previously
provided Name to the NewName.
- The remote responds with `RENAMEEXPORT-SUCCESS`,
- `RENAMEEXPORT-FAILURE`, or with `RENAMEEXPORT-UNSUPPORTED` if an efficient
- rename cannot be done.
+ The remote responds with `RENAMEEXPORT-SUCCESS` or with
+ `RENAMEEXPORT-FAILURE` if an efficient rename cannot be done.
To support old external special remote programs that have not been updated
to support exports, git-annex will need to handle an `ERROR` response
@@ -162,19 +136,19 @@ key/value stores. The content of a file can change, and if multiple
repositories can export a special remote, they can be out of sync about
what files are exported to it.
-To avoid such problems, when updating an exported file on a special remote,
-the key could be recorded there too. But, this would have to be done
-atomically, and checked atomically when downloading the file. Special
-remotes lack atomicity guarantees for file storage, let alone for file
-retrieval.
-
-Possible solution: Make exporttree=true cause the special remote to
+Possible solution: Make exporttree=yes cause the special remote to
be untrusted, and rely on annex.verify to catch cases where the content
of a file on a special remote has changed. This would work well enough
except for when the WORM or URL backend is used. So, prevent the user
from exporting such keys. Also, force verification on for such special
remotes, don't let it be turned off.
+The same file contents may be in a treeish multiple times under different
+filenames. That complicates using location tracking. One file may have been
+exported and the other not, and location tracking says that the content
+is present in the export. A sqlite database is needed to keep track of
+this.
+
## recording exported filenames in git-annex branch
In order to download the content of a key from a file exported
@@ -229,10 +203,101 @@ In this case, git-annex knows both exported trees. Have the user provide
a tree that resolves the conflict as they desire (it could be the same as
one of the exported trees, or some merge of them or an entirely new tree).
The UI to do this can just be another `git annex export $tree --to remote`.
-To resolve, diff each exported tree in turn against the resolving tree. If a
-file differs, re-export that file. In some cases this will do unncessary
-re-uploads, but it's reasonably efficient.
+To resolve, diff each exported tree in turn against the resolving tree
+and delete all files that differ. Then, upload all missing files.
+
+## when to update export.log for efficient resuming of exports
+
+When should `export.log` be updated? Possibilities:
+
+* Before performing any work, to set the goal.
+* After the export is fully successful, to record the current state.
+* After some mid-point.
+
+Lots of things could go wrong during an export. A file might fail to be
+transferred or only part of it be transferred; a file's content might not
+be present to transfer at all. The export could be interrupted part way.
+Updating the export.log at the right point in time is important to handle
+these cases efficiently.
+
+If the export.log is updated first, then it's only a goal and does not tell
+us what's been done already.
+
+If the export.log is updated only after complete success, then the common
+case of some files not having content locally present will prevent it from
+being updated. When we resume, we again don't know what's been done
+already.
+
+If the export.log is updated after deleting any files from the
+remote that are not the same in the new treeish as in the old treeish,
+and as long as TRANSFEREXPORT STORE is atomic, then when resuming we can
+trust CHECKPRESENTEXPORT to only find files that have the correct content
+for the current treeish. (Unless a conflicting export was made from
+elsewhere, but in that case, the conflict resolution will have to fix up
+later.)
+
+## handling renames efficiently
+
+To handle two files that swap names, a temp name is required.
+
+Difficulty with a temp name is picking a name that won't ever be used by
+any exported file.
+
+Interrupted exports also complicate this. While a name could be picked that
+is in neither the old nor the new tree, an export could be interrupted,
+leaving the file at the temp name. There needs to be something to clean
+that up when the export is resumed, even if it's resumed with a different
+tree.
-The documentation should suggest strongly only exporting to a given special
-remote from a single repository, or having some other rule that avoids
-export conflicts.
+Could use something like ".git-annex-tmp-content-$key" as the temp name.
+This hides it from casual view, which is good, and it's not depedent on the
+tree, so no state needs to be maintained to clean it up. Also, using the
+key in the name simplifies calculation of complicated renames (eg, renaming
+A to B, B to C, C to A)
+
+Export can first try to rename all files that are deleted/modified
+to their key's temp name (falling back to deleting since not all
+special remotes support rename), and then, in a second pass, rename
+from the temp name to the new name. Followed by deleting the temp name
+of all keys whose files are deleted in the diff. That is more renames and
+deletes than strictly necessary, but it will statelessly clean up
+an interruped export as long as it's run again with the same new tree.
+
+But, an export of tree B should clean up after
+an interrupted export of tree A. Some state is needed to handle this.
+Before starting the export of tree A, record it somewhere. Then when
+resuming, diff A..B, and delete the temp names of the keys in the
+diff. (Can't rename here, because we don't know what was the content
+of a file when an export was interrupted.)
+
+So, before an export does anything, need to record the tree that's about
+to be exported to export.log, not as an exported tree, but as a goal.
+Then on resume, the temp files for that can be cleaned up.
+
+## renames and export conflicts
+
+What is there's an export conflict going on at the same time that a file
+in the export gets renamed?
+
+Suppose that there are two git repos A and B, each exporting to the same
+remote. A and B are not currently communicating. A exports T1 which
+contains F. B exports T2, which has a different content for F.
+
+Then A exports T3, which renames F to G. If that rename is done
+on the remote, then A will think it's successfully exported T3,
+but G will have F's content from T2, not from T1.
+
+When A and B reconnect, the export conflict will be detected.
+To resolve the export conflict, it says above to:
+
+> To resolve, diff each exported tree in turn against the resolving tree
+> and delete all files that differ. Then, upload all missing files.
+
+Assume that the resolving tree is T3. So B's export of T2 is diffed against
+T3. F differs and is deleted (no change). G differs and is deleted,
+which fixes up the problem that the wrong content was renamed to G.
+G is missing so gets uploaded.
+
+So, this works, as long as "delete all files that differ" means it
+deletes both old and new files. And as long as conflict resolution does not
+itself stash away files in the temp name for later renaming.
diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn
index 87a838bd4..8a34bb2d7 100644
--- a/doc/design/external_special_remote_protocol.mdwn
+++ b/doc/design/external_special_remote_protocol.mdwn
@@ -43,7 +43,8 @@ the version of the protocol it is using.
Once it knows the version, git-annex will generally
send a message telling the special remote to start up.
-(Or it might send a INITREMOTE, so don't hardcode this order.)
+(Or it might send an INITREMOTE or EXPORTSUPPORTED,
+so don't hardcode this order.)
PREPARE
@@ -102,7 +103,7 @@ The following requests *must* all be supported by the special remote.
So any one-time setup tasks should be done idempotently.
* `PREPARE`
Tells the remote that it's time to prepare itself to be used.
- Only INITREMOTE can come before this.
+ Only INITREMOTE or EXPORTSUPPORTED can come before this.
* `TRANSFER STORE|RETRIEVE Key File`
Requests the transfer of a key. For STORE, the File is the file to upload;
for RETRIEVE the File is where to store the download.
@@ -143,6 +144,46 @@ replying with `UNSUPPORTED-REQUEST` is acceptable.
network access.
This is not needed when `SETURIPRESENT` is used, since such uris are
automatically displayed by `git annex whereis`.
+* `EXPORTSUPPORTED`
+ Used to check if a special remote supports exports. The remote
+ responds with either `EXPORTSUPPORTED-SUCCESS` or
+ `EXPORTSUPPORTED-FAILURE`. Note that this request may be made before
+ or after `PREPARE`.
+* `EXPORT Name`
+ Comes immediately before each of the following export-related requests,
+ specifying the name of the exported file. It will be in the form
+ of a relative path, and may contain path separators, whitespace,
+ and other special characters.
+* `TRANSFEREXPORT STORE|RETRIEVE Key File`
+ Requests the transfer of a File on local disk to or from the previously
+ provided Name on the special remote.
+ Note that it's important that, while a file is being stored,
+ CHECKPRESENTEXPORT not indicate it's present until all the data has
+ been transferred.
+ The remote responds with either `TRANSFER-SUCCESS` or
+ `TRANSFER-FAILURE`, and a remote where exports do not make sense
+ may always fail.
+* `CHECKPRESENTEXPORT Key`
+ Requests the remote to check if the previously provided Name is present
+ in it.
+ The remote responds with `CHECKPRESENT-SUCCESS`, `CHECKPRESENT-FAILURE`,
+ or `CHECKPRESENT-UNKNOWN`.
+* `REMOVEEXPORT Key`
+ Requests the remote to remove content stored by `TRANSFEREXPORT`
+ with the previously provided Name.
+ The remote responds with either `REMOVE-SUCCESS` or
+ `REMOVE-FAILURE`.
+ If the content was already not present in the remote, it should
+ respond with `REMOVE-SUCCESS`.
+* `RENAMEEXPORT Key NewName`
+ Requests the remote rename a file stored on it from the previously
+ provided Name to the NewName.
+ The remote responds with `RENAMEEXPORT-SUCCESS` or
+ `RENAMEEXPORT-FAILURE`.
+
+To support old external special remote programs that have not been updated
+to support exports, git-annex will need to handle an `ERROR` response
+when using any of the above.
More optional requests may be added, without changing the protocol version,
so if an unknown request is seen, reply with `UNSUPPORTED-REQUEST`.
@@ -210,6 +251,15 @@ while it's handling a request.
stored in the special remote.
* `WHEREIS-FAILURE`
Indicates that no location is known for a key.
+* `EXPORTSUPPORTED-SUCCESS`
+ Indicates that it makes sense to use this special remote as an export.
+* `EXPORTSUPPORTED`
+ Indicates that it does not make sense to use this special remote as an
+ export.
+* `RENAMEEXPORT-SUCCESS`
+ Indicates that a `RENAMEEXPORT` was done successfully.
+* `RENAMEEXPORT-FAILURE`
+ Indicates that a `RENAMEEXPORT` failed for whatever reason.
* `UNSUPPORTED-REQUEST`
Indicates that the special remote does not know how to handle a request.
diff --git a/doc/git-annex-export.mdwn b/doc/git-annex-export.mdwn
new file mode 100644
index 000000000..72319a8fc
--- /dev/null
+++ b/doc/git-annex-export.mdwn
@@ -0,0 +1,64 @@
+# NAME
+
+git-annex export - export content to a remote
+
+# SYNOPSIS
+
+git annex export `treeish --to remote`
+
+# DESCRIPTION
+
+Use this command to export a tree of files from a git-annex repository.
+
+Normally files are stored on a git-annex special remote named by their
+keys. That is great for reliable data storage, but your filenames are
+obscured. Exporting replicates the tree to the special remote as-is.
+
+Mixing key/value storage and exports in the same remote would be a mess and
+so is not allowed. You have to configure a special remote with
+`exporttree=yes` when initially setting it up with
+[[git-annex-initremote]](1).
+
+Repeated exports are done efficiently, by diffing the old and new tree,
+and transferring only the changed files.
+
+Exports can be interrupted and resumed. However, partially uploaded files
+will be re-started from the beginning.
+
+Once content has been exported to a remote, commands like `git annex get`
+can download content from there the same as from other remotes. However,
+since an export is not a key/value store, git-annex has to do more
+verification of content downloaded from an export. Some types of keys,
+that are not based on checksums, cannot be downloaded from an export.
+And, git-annex will never trust an export to retain the content of a key.
+
+# EXPORT CONFLICTS
+
+If two different git-annex repositories are both exporting different trees
+to the same special remote, it's possible for an export conflict to occur.
+This leaves the special remote with some files from one tree, and some
+files from the other. Files in the special remote may have entirely the
+wrong content as well.
+
+It's not possible for git-annex to detect when making an export will result
+in an export conflict. The best way to avoid export conflicts is to either
+only ever export to a special remote from a single repository, or to have a
+rule about the tree that you export to the special remote. For example, if
+you always export origin/master after pushing to origin, then an export
+conflict can't happen.
+
+An export conflict can only be detected after the two git repositories
+that produced it get back in sync. Then the next time you run `git annex
+export`, it will detect the export conflict, and resolve it.
+
+# SEE ALSO
+
+[[git-annex]](1)
+
+[[git-annex-initremote]](1)
+
+# AUTHOR
+
+Joey Hess <id@joeyh.name>
+
+Warning: Automatically converted into a man page by mdwn2man. Edit with care.
diff --git a/doc/git-annex-import.mdwn b/doc/git-annex-import.mdwn
index 22b3c3941..3684505b6 100644
--- a/doc/git-annex-import.mdwn
+++ b/doc/git-annex-import.mdwn
@@ -96,6 +96,8 @@ instead of to the annex.
[[git-annex-add]](1)
+[[git-annex-export]](1)
+
# AUTHOR
Joey Hess <id@joeyh.name>
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 14a787219..544baafa1 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -158,6 +158,12 @@ subdirectories).
See [[git-annex-importfeed]](1) for details.
+* `export treeish --to remote`
+
+ Export content to a remote.
+
+ See [[git-annex-export]](1) for details.
+
* `undo [filename|directory] ...`
Undo last change to a file or directory.
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index 7d39b1068..ccf1e09b6 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -185,10 +185,23 @@ content expression.
Tracks what trees have been exported to special remotes by
[[git-annex-export]](1).
-Each line starts with a timestamp, then the uuid of the special remote,
-followed by the sha1 of the tree that was exported to that special remote.
+Each line starts with a timestamp, then the uuid of the repository
+that exported to the special remote, followed by the sha1 of the tree
+that was exported, and then by the uuid of the special remote.
-(The exported tree is also grafted into the git-annex branch, at
+There can also be subsequent sha1s, of trees that have started to be
+exported but whose export is not yet complete. The sha1 of the exported
+tree can be the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904)
+in order to record the beginning of the first export.
+
+For example:
+
+ 1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 4b825dc642cb6eb9a060e54bf8d69288fbee4904 26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b
+ 1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
+ 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 7c7af825782b7c8706039b855c72709993542be4
+ 1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
+
+(The trees are also grafted into the git-annex branch, at
`export.tree`, to prevent git from garbage collecting it. However, the head
of the git-annex branch should never contain such a grafted in tree;
the grafted tree is removed in the same commit that updates `export.log`.)
diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn
index 5584f31f3..70610c66d 100644
--- a/doc/special_remotes/directory.mdwn
+++ b/doc/special_remotes/directory.mdwn
@@ -31,6 +31,10 @@ remote:
Do not use for new remotes. It is not safe to change the chunksize
setting of an existing remote.
+* `exporttree` - Set to "yes" to make this special remote usable
+ by [[git-annex-export]]. It will not be usable as a general-purpose
+ special remote.
+
Setup example:
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none
diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn
index e729b0cf1..c4e57bd1c 100644
--- a/doc/todo/export.mdwn
+++ b/doc/todo/export.mdwn
@@ -14,3 +14,15 @@ Would this be able to reuse the existing `storeKey` interface, or would
there need to be a new interface in supported remotes?
--[[Joey]]
+
+Work is in progress. Todo list:
+
+* `git annex get --from export` works in the repo that exported to it,
+ but in another repo, the export db won't be populated, so it won't work.
+ Maybe just show a useful error message in this case?
+ However, exporting from one repository and then trying to update the
+ export from another repository also doesn't work right, because the
+ export database is not populated. So, seems that the export database needs
+ to get populated based on the export log in these cases.
+* Support export to aditional special remotes (S3 etc)
+* Support export to external special remotes.
diff --git a/git-annex.cabal b/git-annex.cabal
index 16b6bda27..af31207bd 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -787,6 +787,7 @@ Executable git-annex
Config.GitConfig
Creds
Crypto
+ Database.Export
Database.Fsck
Database.Handle
Database.Init
@@ -849,6 +850,7 @@ Executable git-annex
Logs.Config
Logs.Difference
Logs.Difference.Pure
+ Logs.Export
Logs.FsckResults
Logs.Group
Logs.Line
@@ -901,6 +903,7 @@ Executable git-annex
Remote.Helper.Chunked
Remote.Helper.Chunked.Legacy
Remote.Helper.Encryptable
+ Remote.Helper.Export
Remote.Helper.Git
Remote.Helper.Hooks
Remote.Helper.Http