diff options
-rw-r--r-- | Annex/Ingest.hs | 31 | ||||
-rw-r--r-- | Assistant/Threads/Committer.hs | 2 | ||||
-rw-r--r-- | CHANGELOG | 3 | ||||
-rw-r--r-- | Command/Import.hs | 93 | ||||
-rw-r--r-- | Command/Smudge.hs | 2 |
5 files changed, 90 insertions, 41 deletions
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs index c120f1a4d..5f6e38ff2 100644 --- a/Annex/Ingest.hs +++ b/Annex/Ingest.hs @@ -1,6 +1,6 @@ {- git-annex content ingestion - - - Copyright 2010-2016 Joey Hess <id@joeyh.name> + - Copyright 2010-2017 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -10,6 +10,7 @@ module Annex.Ingest ( LockDownConfig(..), lockDown, ingestAdd, + ingestAdd', ingest, ingest', finishIngestDirect, @@ -116,10 +117,13 @@ lockDown' cfg file = ifM (pure (not (hardlinkFileTmp cfg)) <||> crippledFileSyst {- Ingests a locked down file into the annex. Updates the work tree and - index. -} ingestAdd :: Maybe LockedDown -> Annex (Maybe Key) -ingestAdd Nothing = return Nothing -ingestAdd ld@(Just (LockedDown cfg source)) = do - (mk, mic) <- ingest ld - case mk of +ingestAdd ld = ingestAdd' ld Nothing + +ingestAdd' :: Maybe LockedDown -> Maybe Key -> Annex (Maybe Key) +ingestAdd' Nothing _ = return Nothing +ingestAdd' ld@(Just (LockedDown cfg source)) mk = do + (mk', mic) <- ingest ld mk + case mk' of Nothing -> return Nothing Just k -> do let f = keyFilename source @@ -140,14 +144,17 @@ ingestAdd ld@(Just (LockedDown cfg source)) = do {- Ingests a locked down file into the annex. Does not update the working - tree or the index. -} -ingest :: Maybe LockedDown -> Annex (Maybe Key, Maybe InodeCache) +ingest :: Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache) ingest = ingest' Nothing -ingest' :: Maybe Backend -> Maybe LockedDown -> Annex (Maybe Key, Maybe InodeCache) -ingest' _ Nothing = return (Nothing, Nothing) -ingest' preferredbackend (Just (LockedDown cfg source)) = withTSDelta $ \delta -> do - backend <- maybe (chooseBackend $ keyFilename source) (return . Just) preferredbackend - k <- genKey source backend +ingest' :: Maybe Backend -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache) +ingest' _ Nothing _ = return (Nothing, Nothing) +ingest' preferredbackend (Just (LockedDown cfg source)) mk = withTSDelta $ \delta -> do + k <- case mk of + Nothing -> do + backend <- maybe (chooseBackend $ keyFilename source) (return . Just) preferredbackend + fmap fst <$> genKey source backend + Just k -> return (Just k) let src = contentLocation source ms <- liftIO $ catchMaybeIO $ getFileStatus src mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms @@ -156,7 +163,7 @@ ingest' preferredbackend (Just (LockedDown cfg source)) = withTSDelta $ \delta - (Just newc, Just c) | compareStrong c newc -> go k mcache ms _ -> failure "changed while it was being added" where - go (Just (key, _)) mcache (Just s) + go (Just key) mcache (Just s) | lockingFile cfg = golocked key mcache s | otherwise = ifM isDirect ( godirect key mcache s diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 7b366bc0a..dbd030b33 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -322,7 +322,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do doadd = sanitycheck ks $ do (mkey, mcache) <- liftAnnex $ do showStart "add" $ keyFilename ks - ingest $ Just $ LockedDown lockdownconfig ks + ingest (Just $ LockedDown lockdownconfig ks) Nothing maybe (failedingest change) (done change mcache $ keyFilename ks) mkey add _ _ = return Nothing @@ -43,6 +43,9 @@ git-annex (6.20170102) UNRELEASED; urgency=medium Note that --clean-duplicates and --deduplicate still check numcopies, so won't delete duplicate files unless there's an annexed copy. + * import: --deduplicate and --skip-duplicates were implemented + inneficiently; they unncessarily hashed each file twice. They have + been improved to only hash once. -- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400 diff --git a/Command/Import.hs b/Command/Import.hs index 4b675475d..ea2ec71e4 100644 --- a/Command/Import.hs +++ b/Command/Import.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2012-2013 Joey Hess <id@joeyh.name> + - Copyright 2012-2017 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -13,11 +13,13 @@ import qualified Annex import qualified Command.Add import Utility.CopyFile import Backend -import Remote import Types.KeySource import Annex.CheckIgnore import Annex.NumCopies import Annex.FileMatcher +import Annex.Ingest +import Annex.InodeSentinal +import Utility.InodeCache import Logs.Location cmd :: Command @@ -71,12 +73,8 @@ start :: GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart start largematcher mode (srcfile, destfile) = ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile) ( do - ma <- pickaction - case ma of - Nothing -> stop - Just a -> do - showStart "import" destfile - next a + showStart "import" destfile + next pickaction , stop ) where @@ -90,7 +88,7 @@ start largematcher mode (srcfile, destfile) = warning "Could not verify that the content is still present in the annex; not removing from the import location." stop ) - importfile = checkdestdir $ do + importfile ld k = checkdestdir $ do ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile if ignored then do @@ -99,14 +97,14 @@ start largematcher mode (srcfile, destfile) = else do existing <- liftIO (catchMaybeIO $ getSymbolicLinkStatus destfile) case existing of - Nothing -> importfilechecked + Nothing -> importfilechecked ld k Just s | isDirectory s -> notoverwriting "(is a directory)" | isSymbolicLink s -> notoverwriting "(is a symlink)" | otherwise -> ifM (Annex.getState Annex.force) ( do liftIO $ nukeFile destfile - importfilechecked + importfilechecked ld k , notoverwriting "(use --force to override, or a duplication option such as --deduplicate to clean up)" ) checkdestdir cont = do @@ -120,33 +118,74 @@ start largematcher mode (srcfile, destfile) = warning $ "not importing " ++ destfile ++ " because " ++ destdir ++ " is not a directory" stop - importfilechecked = do + importfilechecked ld k = do + -- Move or copy the src file to the dest file. + -- The dest file is what will be ingested. liftIO $ createDirectoryIfMissing True (parentDir destfile) liftIO $ if mode == Duplicate || mode == SkipDuplicates then void $ copyFileExternal CopyAllMetaData srcfile destfile else moveFile srcfile destfile + -- Get the inode cache of the dest file. It should be + -- weakly the same as the origianlly locked down file's + -- inode cache. (Since the file may have been copied, + -- its inodes may not be the same.) + newcache <- withTSDelta $ liftIO . genInodeCache destfile + let unchanged = case (newcache, inodeCache (keySource ld)) of + (_, Nothing) -> True + (Just newc, Just c) | compareWeak c newc -> True + _ -> False + unless unchanged $ + giveup "changed while it was being added" + -- The LockedDown needs to be adjusted, since the destfile + -- is what will be ingested. + let ld' = ld + { keySource = KeySource + { keyFilename = destfile + , contentLocation = destfile + , inodeCache = newcache + } + } ifM (checkFileMatcher largematcher destfile) - ( Command.Add.perform destfile + ( ingestAdd' (Just ld') (Just k) + >>= maybe + stop + (\addedk -> next $ Command.Add.cleanup addedk True) , next $ Command.Add.addSmall destfile ) notoverwriting why = do warning $ "not overwriting existing " ++ destfile ++ " " ++ why stop - checkdup dupa notdupa = do - backend <- chooseBackend destfile - let ks = KeySource srcfile srcfile Nothing - v <- genKey ks backend + lockdown a = do + lockingfile <- not <$> addUnlocked + -- Minimal lock down with no hard linking so nothing + -- has to be done to clean up from it. + let cfg = LockDownConfig + { lockingFile = lockingfile + , hardlinkFileTmp = False + } + v <- lockDown cfg srcfile case v of - Just (k, _) -> ifM (isKnownKey k) - ( return (maybe Nothing (\a -> Just (a k)) dupa) - , return notdupa - ) - _ -> return notdupa - pickaction = case mode of - DeDuplicate -> checkdup (Just deletedup) (Just importfile) - CleanDuplicates -> checkdup (Just deletedup) Nothing - SkipDuplicates -> checkdup Nothing (Just importfile) - _ -> return (Just importfile) + Just ld -> do + backend <- chooseBackend destfile + v' <- genKey (keySource ld) backend + case v' of + Just (k, _) -> a (ld, k) + Nothing -> giveup "failed to generate a key" + Nothing -> stop + checkdup k dupa notdupa = ifM (isKnownKey k) + ( dupa + , notdupa + ) + pickaction = lockdown $ \(ld, k) -> case mode of + DeDuplicate -> checkdup k (deletedup k) (importfile ld k) + CleanDuplicates -> checkdup k + (deletedup k) + (skipbecause "not duplicate") + SkipDuplicates -> checkdup k + (skipbecause "duplicate") + (importfile ld k) + _ -> importfile ld k + skipbecause s = showNote (s ++ "; skipping") >> next (return True) verifyExisting :: Key -> FilePath -> (CommandPerform, CommandPerform) -> CommandPerform verifyExisting key destfile (yes, no) = do diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 5a4b879dd..cf5272f82 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -88,7 +88,7 @@ clean file = do <$> catKeyFile file liftIO . emitPointer =<< go - =<< ingest' currbackend + =<< (\ld -> ingest' currbackend ld Nothing) =<< lockDown cfg file , liftIO $ B.hPut stdout b ) |