diff options
author | Joey Hess <joeyh@joeyh.name> | 2017-02-09 15:40:44 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2017-02-09 15:41:00 -0400 |
commit | 1a71d33190a0744f4416be176c944ad938fa17ee (patch) | |
tree | 389fd88359d2680dc0216306b65f23869d67b2fa | |
parent | 8346e57d14c3f6547499188d8a8ae9dc6739e6b5 (diff) |
import --reinject-duplicates
This is the same as running git annex reinject --known, followed by
git-annex import. The advantage to having it in one command is that it
only has to hash each file once; the two commands have to
hash the imported files a second time.
This commit was sponsored by Shane-o on Patreon.
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | Command/Import.hs | 15 | ||||
-rw-r--r-- | Command/Reinject.hs | 25 | ||||
-rw-r--r-- | doc/git-annex-import.mdwn | 13 | ||||
-rw-r--r-- | doc/todo/Alternative_mode_control_for_import/comment_4_767dfbaf72de52bd5fbe4c37add5bd91._comment | 15 |
5 files changed, 50 insertions, 19 deletions
@@ -46,6 +46,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium * import: --deduplicate and --skip-duplicates were implemented inneficiently; they unncessarily hashed each file twice. They have been improved to only hash once. + * import: Added --reinject-duplicates. -- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400 diff --git a/Command/Import.hs b/Command/Import.hs index ea2ec71e4..6d872b1cb 100644 --- a/Command/Import.hs +++ b/Command/Import.hs @@ -11,6 +11,7 @@ import Command import qualified Git import qualified Annex import qualified Command.Add +import qualified Command.Reinject import Utility.CopyFile import Backend import Types.KeySource @@ -28,7 +29,7 @@ cmd = withGlobalOptions (jobsOption : jsonOption : fileMatchingOptions) $ notBar "move and add files from outside git working copy" paramPaths (seek <$$> optParser) -data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates +data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates deriving (Eq) data ImportOptions = ImportOptions @@ -57,7 +58,11 @@ duplicateModeParser = ) <|> flag' SkipDuplicates ( long "skip-duplicates" - <> help "import only new files" + <> help "import only new files (do not delete source files)" + ) + <|> flag' ReinjectDuplicates + ( long "reinject-duplicates" + <> help "import new files, and reinject the content of files that were imported before" ) seek :: ImportOptions -> CommandSeek @@ -88,6 +93,9 @@ start largematcher mode (srcfile, destfile) = warning "Could not verify that the content is still present in the annex; not removing from the import location." stop ) + reinject k = do + showNote "reinjecting" + Command.Reinject.perform srcfile k importfile ld k = checkdestdir $ do ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile if ignored @@ -184,6 +192,9 @@ start largematcher mode (srcfile, destfile) = SkipDuplicates -> checkdup k (skipbecause "duplicate") (importfile ld k) + ReinjectDuplicates -> checkdup k + (reinject k) + (importfile ld k) _ -> importfile ld k skipbecause s = showNote (s ++ "; skipping") >> next (return True) diff --git a/Command/Reinject.hs b/Command/Reinject.hs index 7d2da9420..8fe7587fa 100644 --- a/Command/Reinject.hs +++ b/Command/Reinject.hs @@ -43,9 +43,12 @@ startSrcDest (src:dest:[]) | src == dest = stop | otherwise = notAnnexed src $ do showStart "reinject" dest - next $ ifAnnexed dest - (\key -> perform src key (verifyKeyContent DefaultVerify UnVerified key src)) - stop + next $ ifAnnexed dest go stop + where + go key = ifM (verifyKeyContent DefaultVerify UnVerified key src) + ( perform src key + , error "failed" + ) startSrcDest _ = giveup "specify a src file and a dest file" startKnown :: FilePath -> CommandStart @@ -55,7 +58,7 @@ startKnown src = notAnnexed src $ do case mkb of Nothing -> error "Failed to generate key" Just (key, _) -> ifM (isKnownKey key) - ( next $ perform src key (return True) + ( next $ perform src key , do warning "Not known content; skipping" next $ next $ return True @@ -65,19 +68,15 @@ notAnnexed :: FilePath -> CommandStart -> CommandStart notAnnexed src = ifAnnexed src $ giveup $ "cannot used annexed file as src: " ++ src -perform :: FilePath -> Key -> Annex Bool -> CommandPerform -perform src key verify = ifM move +perform :: FilePath -> Key -> CommandPerform +perform src key = ifM move ( next $ cleanup key , error "failed" ) where - move = checkDiskSpaceToGet key False $ - ifM verify - ( do - moveAnnex key src - return True - , return False - ) + move = checkDiskSpaceToGet key False $ do + moveAnnex key src + return True cleanup :: Key -> CommandCleanup cleanup key = do diff --git a/doc/git-annex-import.mdwn b/doc/git-annex-import.mdwn index 92519fce6..22b3c3941 100644 --- a/doc/git-annex-import.mdwn +++ b/doc/git-annex-import.mdwn @@ -33,10 +33,9 @@ Several options can be used to adjust handling of duplicate files. Do not delete files from the import location. - This could allow importing the same files repeatedly - to different locations in a repository. More likely, it could be used to - import the same files to a number of different branches or separate git - repositories. + Running with this option repeatedly can import the same files into + different git repositories, or branches, or different locations in a git + repository. * `--deduplicate` @@ -53,6 +52,12 @@ Several options can be used to adjust handling of duplicate files. Does not import any files, but any files found in the import location that are duplicates are deleted. +* `--reinject-duplicates` + + Imports files that are not duplicates. Files that are duplicates have + their content reinjected into the annex (similar to + [[git-annex-reinject]]). + * `--force` Allow existing files to be overwritten by newly imported files. diff --git a/doc/todo/Alternative_mode_control_for_import/comment_4_767dfbaf72de52bd5fbe4c37add5bd91._comment b/doc/todo/Alternative_mode_control_for_import/comment_4_767dfbaf72de52bd5fbe4c37add5bd91._comment new file mode 100644 index 000000000..70c49cf79 --- /dev/null +++ b/doc/todo/Alternative_mode_control_for_import/comment_4_767dfbaf72de52bd5fbe4c37add5bd91._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2017-02-09T19:33:46Z" + content=""" +Actually, import --deduplicate, --skip-duplicates, --clean-duplicates +are implemeted naively and do hash files twice. So it's +the same efficiency.. + +But, I just finished a more complicated implementation that avoids +the second hashing. + +That does make the combined action worth adding, I suppose. Done so as +--reinject-duplicates. +"""]] |