summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/Ingest.hs31
-rw-r--r--Assistant/Threads/Committer.hs2
-rw-r--r--CHANGELOG3
-rw-r--r--Command/Import.hs93
-rw-r--r--Command/Smudge.hs2
5 files changed, 90 insertions, 41 deletions
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs
index c120f1a4d..5f6e38ff2 100644
--- a/Annex/Ingest.hs
+++ b/Annex/Ingest.hs
@@ -1,6 +1,6 @@
{- git-annex content ingestion
-
- - Copyright 2010-2016 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -10,6 +10,7 @@ module Annex.Ingest (
LockDownConfig(..),
lockDown,
ingestAdd,
+ ingestAdd',
ingest,
ingest',
finishIngestDirect,
@@ -116,10 +117,13 @@ lockDown' cfg file = ifM (pure (not (hardlinkFileTmp cfg)) <||> crippledFileSyst
{- Ingests a locked down file into the annex. Updates the work tree and
- index. -}
ingestAdd :: Maybe LockedDown -> Annex (Maybe Key)
-ingestAdd Nothing = return Nothing
-ingestAdd ld@(Just (LockedDown cfg source)) = do
- (mk, mic) <- ingest ld
- case mk of
+ingestAdd ld = ingestAdd' ld Nothing
+
+ingestAdd' :: Maybe LockedDown -> Maybe Key -> Annex (Maybe Key)
+ingestAdd' Nothing _ = return Nothing
+ingestAdd' ld@(Just (LockedDown cfg source)) mk = do
+ (mk', mic) <- ingest ld mk
+ case mk' of
Nothing -> return Nothing
Just k -> do
let f = keyFilename source
@@ -140,14 +144,17 @@ ingestAdd ld@(Just (LockedDown cfg source)) = do
{- Ingests a locked down file into the annex. Does not update the working
- tree or the index.
-}
-ingest :: Maybe LockedDown -> Annex (Maybe Key, Maybe InodeCache)
+ingest :: Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache)
ingest = ingest' Nothing
-ingest' :: Maybe Backend -> Maybe LockedDown -> Annex (Maybe Key, Maybe InodeCache)
-ingest' _ Nothing = return (Nothing, Nothing)
-ingest' preferredbackend (Just (LockedDown cfg source)) = withTSDelta $ \delta -> do
- backend <- maybe (chooseBackend $ keyFilename source) (return . Just) preferredbackend
- k <- genKey source backend
+ingest' :: Maybe Backend -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache)
+ingest' _ Nothing _ = return (Nothing, Nothing)
+ingest' preferredbackend (Just (LockedDown cfg source)) mk = withTSDelta $ \delta -> do
+ k <- case mk of
+ Nothing -> do
+ backend <- maybe (chooseBackend $ keyFilename source) (return . Just) preferredbackend
+ fmap fst <$> genKey source backend
+ Just k -> return (Just k)
let src = contentLocation source
ms <- liftIO $ catchMaybeIO $ getFileStatus src
mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms
@@ -156,7 +163,7 @@ ingest' preferredbackend (Just (LockedDown cfg source)) = withTSDelta $ \delta -
(Just newc, Just c) | compareStrong c newc -> go k mcache ms
_ -> failure "changed while it was being added"
where
- go (Just (key, _)) mcache (Just s)
+ go (Just key) mcache (Just s)
| lockingFile cfg = golocked key mcache s
| otherwise = ifM isDirect
( godirect key mcache s
diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs
index 7b366bc0a..dbd030b33 100644
--- a/Assistant/Threads/Committer.hs
+++ b/Assistant/Threads/Committer.hs
@@ -322,7 +322,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
doadd = sanitycheck ks $ do
(mkey, mcache) <- liftAnnex $ do
showStart "add" $ keyFilename ks
- ingest $ Just $ LockedDown lockdownconfig ks
+ ingest (Just $ LockedDown lockdownconfig ks) Nothing
maybe (failedingest change) (done change mcache $ keyFilename ks) mkey
add _ _ = return Nothing
diff --git a/CHANGELOG b/CHANGELOG
index 45ed65a66..e348014a6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -43,6 +43,9 @@ git-annex (6.20170102) UNRELEASED; urgency=medium
Note that --clean-duplicates and --deduplicate still check
numcopies, so won't delete duplicate files unless there's an annexed
copy.
+ * import: --deduplicate and --skip-duplicates were implemented
+ inneficiently; they unncessarily hashed each file twice. They have
+ been improved to only hash once.
-- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400
diff --git a/Command/Import.hs b/Command/Import.hs
index 4b675475d..ea2ec71e4 100644
--- a/Command/Import.hs
+++ b/Command/Import.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2012-2013 Joey Hess <id@joeyh.name>
+ - Copyright 2012-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -13,11 +13,13 @@ import qualified Annex
import qualified Command.Add
import Utility.CopyFile
import Backend
-import Remote
import Types.KeySource
import Annex.CheckIgnore
import Annex.NumCopies
import Annex.FileMatcher
+import Annex.Ingest
+import Annex.InodeSentinal
+import Utility.InodeCache
import Logs.Location
cmd :: Command
@@ -71,12 +73,8 @@ start :: GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
start largematcher mode (srcfile, destfile) =
ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile)
( do
- ma <- pickaction
- case ma of
- Nothing -> stop
- Just a -> do
- showStart "import" destfile
- next a
+ showStart "import" destfile
+ next pickaction
, stop
)
where
@@ -90,7 +88,7 @@ start largematcher mode (srcfile, destfile) =
warning "Could not verify that the content is still present in the annex; not removing from the import location."
stop
)
- importfile = checkdestdir $ do
+ importfile ld k = checkdestdir $ do
ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile
if ignored
then do
@@ -99,14 +97,14 @@ start largematcher mode (srcfile, destfile) =
else do
existing <- liftIO (catchMaybeIO $ getSymbolicLinkStatus destfile)
case existing of
- Nothing -> importfilechecked
+ Nothing -> importfilechecked ld k
Just s
| isDirectory s -> notoverwriting "(is a directory)"
| isSymbolicLink s -> notoverwriting "(is a symlink)"
| otherwise -> ifM (Annex.getState Annex.force)
( do
liftIO $ nukeFile destfile
- importfilechecked
+ importfilechecked ld k
, notoverwriting "(use --force to override, or a duplication option such as --deduplicate to clean up)"
)
checkdestdir cont = do
@@ -120,33 +118,74 @@ start largematcher mode (srcfile, destfile) =
warning $ "not importing " ++ destfile ++ " because " ++ destdir ++ " is not a directory"
stop
- importfilechecked = do
+ importfilechecked ld k = do
+ -- Move or copy the src file to the dest file.
+ -- The dest file is what will be ingested.
liftIO $ createDirectoryIfMissing True (parentDir destfile)
liftIO $ if mode == Duplicate || mode == SkipDuplicates
then void $ copyFileExternal CopyAllMetaData srcfile destfile
else moveFile srcfile destfile
+ -- Get the inode cache of the dest file. It should be
+ -- weakly the same as the origianlly locked down file's
+ -- inode cache. (Since the file may have been copied,
+ -- its inodes may not be the same.)
+ newcache <- withTSDelta $ liftIO . genInodeCache destfile
+ let unchanged = case (newcache, inodeCache (keySource ld)) of
+ (_, Nothing) -> True
+ (Just newc, Just c) | compareWeak c newc -> True
+ _ -> False
+ unless unchanged $
+ giveup "changed while it was being added"
+ -- The LockedDown needs to be adjusted, since the destfile
+ -- is what will be ingested.
+ let ld' = ld
+ { keySource = KeySource
+ { keyFilename = destfile
+ , contentLocation = destfile
+ , inodeCache = newcache
+ }
+ }
ifM (checkFileMatcher largematcher destfile)
- ( Command.Add.perform destfile
+ ( ingestAdd' (Just ld') (Just k)
+ >>= maybe
+ stop
+ (\addedk -> next $ Command.Add.cleanup addedk True)
, next $ Command.Add.addSmall destfile
)
notoverwriting why = do
warning $ "not overwriting existing " ++ destfile ++ " " ++ why
stop
- checkdup dupa notdupa = do
- backend <- chooseBackend destfile
- let ks = KeySource srcfile srcfile Nothing
- v <- genKey ks backend
+ lockdown a = do
+ lockingfile <- not <$> addUnlocked
+ -- Minimal lock down with no hard linking so nothing
+ -- has to be done to clean up from it.
+ let cfg = LockDownConfig
+ { lockingFile = lockingfile
+ , hardlinkFileTmp = False
+ }
+ v <- lockDown cfg srcfile
case v of
- Just (k, _) -> ifM (isKnownKey k)
- ( return (maybe Nothing (\a -> Just (a k)) dupa)
- , return notdupa
- )
- _ -> return notdupa
- pickaction = case mode of
- DeDuplicate -> checkdup (Just deletedup) (Just importfile)
- CleanDuplicates -> checkdup (Just deletedup) Nothing
- SkipDuplicates -> checkdup Nothing (Just importfile)
- _ -> return (Just importfile)
+ Just ld -> do
+ backend <- chooseBackend destfile
+ v' <- genKey (keySource ld) backend
+ case v' of
+ Just (k, _) -> a (ld, k)
+ Nothing -> giveup "failed to generate a key"
+ Nothing -> stop
+ checkdup k dupa notdupa = ifM (isKnownKey k)
+ ( dupa
+ , notdupa
+ )
+ pickaction = lockdown $ \(ld, k) -> case mode of
+ DeDuplicate -> checkdup k (deletedup k) (importfile ld k)
+ CleanDuplicates -> checkdup k
+ (deletedup k)
+ (skipbecause "not duplicate")
+ SkipDuplicates -> checkdup k
+ (skipbecause "duplicate")
+ (importfile ld k)
+ _ -> importfile ld k
+ skipbecause s = showNote (s ++ "; skipping") >> next (return True)
verifyExisting :: Key -> FilePath -> (CommandPerform, CommandPerform) -> CommandPerform
verifyExisting key destfile (yes, no) = do
diff --git a/Command/Smudge.hs b/Command/Smudge.hs
index 5a4b879dd..cf5272f82 100644
--- a/Command/Smudge.hs
+++ b/Command/Smudge.hs
@@ -88,7 +88,7 @@ clean file = do
<$> catKeyFile file
liftIO . emitPointer
=<< go
- =<< ingest' currbackend
+ =<< (\ld -> ingest' currbackend ld Nothing)
=<< lockDown cfg file
, liftIO $ B.hPut stdout b
)