summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-08-31 13:29:54 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-08-31 13:33:50 -0400
commit36b222bf699023f3e460c8bc231b2916aa27ab5c (patch)
treedc538a3362a263f5fee1eedbfe39fa32b88a27d1
parenta46a3ce0208f4471368a4070ebd12835375491a5 (diff)
resuming exports
Make a pass over the whole exported tree, and upload anything that has not yet reached the export. Update location log when exporting. Note that the synthesized keys for non-annexed files are stored in the location log too. Some cases involving files in the tree with the same content are not handled correctly yet. This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.
-rw-r--r--Command/Export.hs111
-rw-r--r--doc/design/exporting_trees_to_special_remotes.mdwn5
-rw-r--r--doc/todo/export.mdwn10
3 files changed, 92 insertions, 34 deletions
diff --git a/Command/Export.hs b/Command/Export.hs
index a2632857a..aba8a1877 100644
--- a/Command/Export.hs
+++ b/Command/Export.hs
@@ -10,6 +10,8 @@ module Command.Export where
import Command
import qualified Git
import qualified Git.DiffTree
+import qualified Git.LsTree
+import Git.Types
import Git.Sha
import Git.FilePath
import Types.Key
@@ -41,69 +43,110 @@ optParser _ = ExportOptions
( metavar paramTreeish
)
+-- An export includes both annexed files and files stored in git.
+-- For the latter, a SHA1 key is synthesized.
+data ExportKey = AnnexKey Key | GitKey Key
+
+asKey :: ExportKey -> Key
+asKey (AnnexKey k) = k
+asKey (GitKey k) = k
+
+exportKey :: Git.Sha -> Annex ExportKey
+exportKey sha = mk <$> catKey sha
+ where
+ mk (Just k) = AnnexKey k
+ mk Nothing = GitKey $ Key
+ { keyName = show sha
+ , keyVariety = SHA1Key (HasExt False)
+ , keySize = Nothing
+ , keyMtime = Nothing
+ , keyChunkSize = Nothing
+ , keyChunkNum = Nothing
+ }
+
seek :: ExportOptions -> CommandSeek
seek o = do
r <- getParsed (exportRemote o)
let oldtreeish = emptyTree -- XXX temporary
+
+ -- First, diff the old and new trees and update all changed
+ -- files in the export.
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o)
- seekActions $ pure $ map (start r) diff
+ seekActions $ pure $ map (startDiff r) diff
void $ liftIO cleanup
-start :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
-start r diff
+ -- In case a previous export was incomplete, make a pass
+ -- over the whole tree and export anything that is not
+ -- yet exported.
+ (l, cleanup') <- inRepo $ Git.LsTree.lsTree (exportTreeish o)
+ seekActions $ pure $ map (start r) l
+ void $ liftIO cleanup'
+
+startDiff :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
+startDiff r diff
| Git.DiffTree.dstsha diff == nullSha = do
showStart "unexport" f
- oldk <- either id id <$> exportKey (Git.DiffTree.srcsha diff)
+ oldk <- exportKey (Git.DiffTree.srcsha diff)
next $ performUnexport r oldk loc
| otherwise = do
showStart "export" f
- next $ performExport r diff loc
+ k <- exportKey (Git.DiffTree.dstsha diff)
+ next $ performExport r k (Git.DiffTree.dstsha diff) loc
where
loc = ExportLocation $ toInternalGitPath $
getTopFilePath $ Git.DiffTree.file diff
f = getTopFilePath $ Git.DiffTree.file diff
-performExport :: Remote -> Git.DiffTree.DiffTreeItem -> ExportLocation -> CommandPerform
-performExport r diff loc = case storeExport r of
+start :: Remote -> Git.LsTree.TreeItem -> CommandStart
+start r ti = do
+ ek <- exportKey (Git.LsTree.sha ti)
+ stopUnless (elem (uuid r) <$> loggedLocations (asKey ek)) $
+ next $ performExport r ek (Git.LsTree.sha ti) loc
+ where
+ loc = ExportLocation $ toInternalGitPath $
+ getTopFilePath $ Git.LsTree.file ti
+
+performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform
+performExport r ek contentsha loc = case storeExport r of
Nothing -> error "remote does not support exporting files"
- Just storer -> next $ do
- v <- exportKey (Git.DiffTree.dstsha diff)
- case v of
- Right k -> ifM (inAnnex k)
- ( metered Nothing k $ \m ->
- sendAnnex k
- (void $ performUnexport r k loc)
+ Just storer -> do
+ sent <- case ek of
+ AnnexKey k -> ifM (inAnnex k)
+ ( metered Nothing k $ \m -> do
+ let rollback = void $ performUnexport r ek loc
+ sendAnnex k rollback
(\f -> storer f k loc m)
, do
showNote "not available"
return False
)
-- Sending a non-annexed file.
- Left sha1k -> metered Nothing sha1k $ \m ->
+ GitKey sha1k -> metered Nothing sha1k $ \m ->
withTmpFile "export" $ \tmp h -> do
- b <- catObject (Git.DiffTree.dstsha diff)
+ b <- catObject contentsha
liftIO $ L.hPut h b
liftIO $ hClose h
storer tmp sha1k loc m
+ if sent
+ then next $ cleanupExport r ek
+ else stop
+
+cleanupExport :: Remote -> ExportKey -> CommandCleanup
+cleanupExport r ek = do
+ logChange (asKey ek) (uuid r) InfoPresent
+ return True
-performUnexport :: Remote -> Key -> ExportLocation -> CommandPerform
-performUnexport r k loc = case removeExport r of
+performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform
+performUnexport r ek loc = case removeExport r of
Nothing -> error "remote does not support removing exported files"
- Just remover -> next $ remover k loc
+ Just remover -> do
+ ok <- remover (asKey ek) loc
+ if ok
+ then next $ cleanupUnexport r ek
+ else stop
--- When the Sha points to an annexed file, get the key as Right.
--- When the Sha points to a non-annexed file, convert to a SHA1 key,
--- as Left.
-exportKey :: Git.Sha -> Annex (Either Key Key)
-exportKey sha = mk <$> catKey sha
- where
- mk (Just k) = Right k
- mk Nothing = Left $ Key
- { keyName = show sha
- , keyVariety = SHA1Key (HasExt False)
- , keySize = Nothing
- , keyMtime = Nothing
- , keyChunkSize = Nothing
- , keyChunkNum = Nothing
- }
+cleanupUnexport :: Remote -> ExportKey -> CommandCleanup
+cleanupUnexport r ek = do
+ logChange (asKey ek) (uuid r) InfoMissing
+ return True
diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn
index ce7431141..c9b2b72e5 100644
--- a/doc/design/exporting_trees_to_special_remotes.mdwn
+++ b/doc/design/exporting_trees_to_special_remotes.mdwn
@@ -175,6 +175,11 @@ except for when the WORM or URL backend is used. So, prevent the user
from exporting such keys. Also, force verification on for such special
remotes, don't let it be turned off.
+The same file contents may be in a treeish multiple times under different
+filenames. That complicates using location tracking. One file may have been
+exported and the other not, and location tracking says that the content
+is present in the export.
+
## recording exported filenames in git-annex branch
In order to download the content of a key from a file exported
diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn
index f589e953d..354dc84e7 100644
--- a/doc/todo/export.mdwn
+++ b/doc/todo/export.mdwn
@@ -27,3 +27,13 @@ Work is in progress. Todo list:
* Detect export conflicts (see design)
* Support export to aditional special remotes (S3 etc)
* Support export to external special remotes.
+* If the same content is present in two different files, export
+ location tracking can be messed up.
+
+ When one of the files is deleted and
+ that tree is exported, the location log for the key will be updated
+ to say it's not present, even though the other file is still present.
+
+ And, once one of the files is uploaded, the location log will
+ say the content is present, so the pass over the tree won't try to
+ upload the other file.