summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2017-02-09 15:40:44 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2017-02-09 15:41:00 -0400
commit1a71d33190a0744f4416be176c944ad938fa17ee (patch)
tree389fd88359d2680dc0216306b65f23869d67b2fa
parent8346e57d14c3f6547499188d8a8ae9dc6739e6b5 (diff)
import --reinject-duplicates
This is the same as running git annex reinject --known, followed by git-annex import. The advantage to having it in one command is that it only has to hash each file once; the two commands have to hash the imported files a second time. This commit was sponsored by Shane-o on Patreon.
-rw-r--r--CHANGELOG1
-rw-r--r--Command/Import.hs15
-rw-r--r--Command/Reinject.hs25
-rw-r--r--doc/git-annex-import.mdwn13
-rw-r--r--doc/todo/Alternative_mode_control_for_import/comment_4_767dfbaf72de52bd5fbe4c37add5bd91._comment15
5 files changed, 50 insertions, 19 deletions
diff --git a/CHANGELOG b/CHANGELOG
index e348014a6..321f98c52 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -46,6 +46,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium
* import: --deduplicate and --skip-duplicates were implemented
inneficiently; they unncessarily hashed each file twice. They have
been improved to only hash once.
+ * import: Added --reinject-duplicates.
-- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400
diff --git a/Command/Import.hs b/Command/Import.hs
index ea2ec71e4..6d872b1cb 100644
--- a/Command/Import.hs
+++ b/Command/Import.hs
@@ -11,6 +11,7 @@ import Command
import qualified Git
import qualified Annex
import qualified Command.Add
+import qualified Command.Reinject
import Utility.CopyFile
import Backend
import Types.KeySource
@@ -28,7 +29,7 @@ cmd = withGlobalOptions (jobsOption : jsonOption : fileMatchingOptions) $ notBar
"move and add files from outside git working copy"
paramPaths (seek <$$> optParser)
-data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates
+data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
deriving (Eq)
data ImportOptions = ImportOptions
@@ -57,7 +58,11 @@ duplicateModeParser =
)
<|> flag' SkipDuplicates
( long "skip-duplicates"
- <> help "import only new files"
+ <> help "import only new files (do not delete source files)"
+ )
+ <|> flag' ReinjectDuplicates
+ ( long "reinject-duplicates"
+ <> help "import new files, and reinject the content of files that were imported before"
)
seek :: ImportOptions -> CommandSeek
@@ -88,6 +93,9 @@ start largematcher mode (srcfile, destfile) =
warning "Could not verify that the content is still present in the annex; not removing from the import location."
stop
)
+ reinject k = do
+ showNote "reinjecting"
+ Command.Reinject.perform srcfile k
importfile ld k = checkdestdir $ do
ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile
if ignored
@@ -184,6 +192,9 @@ start largematcher mode (srcfile, destfile) =
SkipDuplicates -> checkdup k
(skipbecause "duplicate")
(importfile ld k)
+ ReinjectDuplicates -> checkdup k
+ (reinject k)
+ (importfile ld k)
_ -> importfile ld k
skipbecause s = showNote (s ++ "; skipping") >> next (return True)
diff --git a/Command/Reinject.hs b/Command/Reinject.hs
index 7d2da9420..8fe7587fa 100644
--- a/Command/Reinject.hs
+++ b/Command/Reinject.hs
@@ -43,9 +43,12 @@ startSrcDest (src:dest:[])
| src == dest = stop
| otherwise = notAnnexed src $ do
showStart "reinject" dest
- next $ ifAnnexed dest
- (\key -> perform src key (verifyKeyContent DefaultVerify UnVerified key src))
- stop
+ next $ ifAnnexed dest go stop
+ where
+ go key = ifM (verifyKeyContent DefaultVerify UnVerified key src)
+ ( perform src key
+ , error "failed"
+ )
startSrcDest _ = giveup "specify a src file and a dest file"
startKnown :: FilePath -> CommandStart
@@ -55,7 +58,7 @@ startKnown src = notAnnexed src $ do
case mkb of
Nothing -> error "Failed to generate key"
Just (key, _) -> ifM (isKnownKey key)
- ( next $ perform src key (return True)
+ ( next $ perform src key
, do
warning "Not known content; skipping"
next $ next $ return True
@@ -65,19 +68,15 @@ notAnnexed :: FilePath -> CommandStart -> CommandStart
notAnnexed src = ifAnnexed src $
giveup $ "cannot used annexed file as src: " ++ src
-perform :: FilePath -> Key -> Annex Bool -> CommandPerform
-perform src key verify = ifM move
+perform :: FilePath -> Key -> CommandPerform
+perform src key = ifM move
( next $ cleanup key
, error "failed"
)
where
- move = checkDiskSpaceToGet key False $
- ifM verify
- ( do
- moveAnnex key src
- return True
- , return False
- )
+ move = checkDiskSpaceToGet key False $ do
+ moveAnnex key src
+ return True
cleanup :: Key -> CommandCleanup
cleanup key = do
diff --git a/doc/git-annex-import.mdwn b/doc/git-annex-import.mdwn
index 92519fce6..22b3c3941 100644
--- a/doc/git-annex-import.mdwn
+++ b/doc/git-annex-import.mdwn
@@ -33,10 +33,9 @@ Several options can be used to adjust handling of duplicate files.
Do not delete files from the import location.
- This could allow importing the same files repeatedly
- to different locations in a repository. More likely, it could be used to
- import the same files to a number of different branches or separate git
- repositories.
+ Running with this option repeatedly can import the same files into
+ different git repositories, or branches, or different locations in a git
+ repository.
* `--deduplicate`
@@ -53,6 +52,12 @@ Several options can be used to adjust handling of duplicate files.
Does not import any files, but any files found in the import location
that are duplicates are deleted.
+* `--reinject-duplicates`
+
+ Imports files that are not duplicates. Files that are duplicates have
+ their content reinjected into the annex (similar to
+ [[git-annex-reinject]]).
+
* `--force`
Allow existing files to be overwritten by newly imported files.
diff --git a/doc/todo/Alternative_mode_control_for_import/comment_4_767dfbaf72de52bd5fbe4c37add5bd91._comment b/doc/todo/Alternative_mode_control_for_import/comment_4_767dfbaf72de52bd5fbe4c37add5bd91._comment
new file mode 100644
index 000000000..70c49cf79
--- /dev/null
+++ b/doc/todo/Alternative_mode_control_for_import/comment_4_767dfbaf72de52bd5fbe4c37add5bd91._comment
@@ -0,0 +1,15 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 4"""
+ date="2017-02-09T19:33:46Z"
+ content="""
+Actually, import --deduplicate, --skip-duplicates, --clean-duplicates
+are implemeted naively and do hash files twice. So it's
+the same efficiency..
+
+But, I just finished a more complicated implementation that avoids
+the second hashing.
+
+That does make the combined action worth adding, I suppose. Done so as
+--reinject-duplicates.
+"""]]