From f758f6d5cbef989bff75fcd140edb8e0b8899b84 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 6 Apr 2013 16:01:39 -0400 Subject: Bugfix: Direct mode no longer repeatedly checksums duplicated files. Fixed by storing a list of cached inodes for a key, instead of just one. Backwards compatability note: An old git-annex version will fail to parse an inode cache file that has been written by a new version, and has multiple items. It will succees if just one. So old git-annexes will have even worse behavior when there are duplicated files, if that is possible. I don't think it will be a problem. (Famous last words.) Also, note that it doesn't expire old and unused inode caches for a key. It would be possible to add this if needed; just look through the associated files for a key and if there are more cached inodes, throw out any not corresponding to associated files. Unless a file is being copied repeatedly and the old copy deleted, this lack of expiry should not be a problem. --- Annex/Content/Direct.hs | 54 ++++++++++++++++------ Annex/Direct.hs | 8 ++-- Assistant/Threads/Committer.hs | 9 ++-- Command/Add.hs | 2 +- debian/changelog | 6 +++ ...mode_keeps_re-checksuming_duplicated_files.mdwn | 3 ++ 6 files changed, 56 insertions(+), 26 deletions(-) diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs index 1f9ddb784..54befdf73 100644 --- a/Annex/Content/Direct.hs +++ b/Annex/Content/Direct.hs @@ -12,10 +12,12 @@ module Annex.Content.Direct ( goodContent, recordedInodeCache, updateInodeCache, + addInodeCache, writeInodeCache, compareInodeCaches, compareInodeCachesWith, sameInodeCache, + elemInodeCaches, sameFileStatus, removeInodeCache, toInodeCache, @@ -101,21 +103,36 @@ normaliseAssociatedFile file = do goodContent :: Key -> FilePath -> Annex Bool goodContent key file = sameInodeCache file =<< recordedInodeCache key -{- Gets the recorded inode cache for a key. -} -recordedInodeCache :: Key -> Annex (Maybe InodeCache) +{- Gets the recorded inode cache for a key. + - + - A key can be associated with multiple files, so may return more than + - one. -} +recordedInodeCache :: Key -> Annex [InodeCache] recordedInodeCache key = withInodeCacheFile key $ \f -> - liftIO $ catchDefaultIO Nothing $ readInodeCache <$> readFile f + liftIO $ catchDefaultIO [] $ + mapMaybe readInodeCache . lines <$> readFile f -{- Stores a cache of attributes for a file that is associated with a key. -} +{- Caches an inode for a file. + - + - Anything else already cached is preserved. + -} updateInodeCache :: Key -> FilePath -> Annex () -updateInodeCache key file = maybe noop (writeInodeCache key) +updateInodeCache key file = maybe noop (addInodeCache key) =<< liftIO (genInodeCache file) -{- Writes a cache for a key. -} -writeInodeCache :: Key -> InodeCache -> Annex () -writeInodeCache key cache = withInodeCacheFile key $ \f -> do +{- Adds another inode to the cache for a key. -} +addInodeCache :: Key -> InodeCache -> Annex () +addInodeCache key cache = do + oldcaches <- recordedInodeCache key + unlessM (elemInodeCaches cache oldcaches) $ + writeInodeCache key (cache:oldcaches) + +{- Writes inode cache for a key. -} +writeInodeCache :: Key -> [InodeCache] -> Annex () +writeInodeCache key caches = withInodeCacheFile key $ \f -> do createContentDir f - liftIO $ writeFile f $ showInodeCache cache + liftIO $ writeFile f $ + unlines $ map showInodeCache caches {- Removes an inode cache. -} removeInodeCache :: Key -> Annex () @@ -127,12 +144,12 @@ withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key) {- Checks if a InodeCache matches the current version of a file. -} -sameInodeCache :: FilePath -> Maybe InodeCache -> Annex Bool -sameInodeCache _ Nothing = return False -sameInodeCache file (Just old) = go =<< liftIO (genInodeCache file) +sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool +sameInodeCache _ [] = return False +sameInodeCache file old = go =<< liftIO (genInodeCache file) where go Nothing = return False - go (Just curr) = compareInodeCaches curr old + go (Just curr) = elemInodeCaches curr old {- Checks if a FileStatus matches the recorded InodeCache of a file. -} sameFileStatus :: Key -> FileStatus -> Annex Bool @@ -140,8 +157,8 @@ sameFileStatus key status = do old <- recordedInodeCache key let curr = toInodeCache status case (old, curr) of - (Just o, Just c) -> compareInodeCaches o c - (Nothing, Nothing) -> return True + (_, Just c) -> elemInodeCaches c old + ([], Nothing) -> return True _ -> return False {- If the inodes have changed, only the size and mtime are compared. -} @@ -153,6 +170,13 @@ compareInodeCaches x y , return False ) +elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool +elemInodeCaches _ [] = return False +elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) + ( return True + , elemInodeCaches c ls + ) + compareInodeCachesWith :: Annex InodeComparisonType compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) diff --git a/Annex/Direct.hs b/Annex/Direct.hs index e3779adc8..a0388017e 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -52,8 +52,8 @@ stageDirect = do - it really was. -} oldcache <- recordedInodeCache key case oldcache of - Nothing -> modifiedannexed file key cache - Just c -> unlessM (compareInodeCaches c cache) $ + [] -> modifiedannexed file key cache + _ -> unlessM (elemInodeCaches cache oldcache) $ modifiedannexed file key cache (Just key, Nothing, _) -> deletedannexed file key (Nothing, Nothing, _) -> deletegit file @@ -87,11 +87,11 @@ addDirect file cache = do got Nothing = do showEndFail return False - got (Just (key, _)) = ifM (sameInodeCache file $ Just cache) + got (Just (key, _)) = ifM (sameInodeCache file [cache]) ( do l <- inRepo $ gitAnnexLink file key stageSymlink file =<< hashSymlink l - writeInodeCache key cache + addInodeCache key cache void $ addAssociatedFile key file logStatus key InfoPresent showEndOk diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index bee359d59..727b85840 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -297,13 +297,10 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do removedKeysMap ct l = do mks <- forM (filter isRmChange l) $ \c -> catKeyFile $ changeFile c - M.fromList . catMaybes <$> forM (catMaybes mks) mkpair + M.fromList . concat <$> mapM mkpairs (catMaybes mks) where - mkpair k = do - mcache <- recordedInodeCache k - case mcache of - Just cache -> return $ Just (inodeCacheToKey ct cache, k) - Nothing -> return Nothing + mkpairs k = map (\c -> (inodeCacheToKey ct c, k)) <$> + recordedInodeCache k failedingest = do liftAnnex showEndFail diff --git a/Command/Add.hs b/Command/Add.hs index 30e989e4c..a5dfc1d1c 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -132,7 +132,7 @@ ingest (Just source) = do goindirect Nothing _ = failure godirect (Just (key, _)) (Just cache) = do - writeInodeCache key cache + addInodeCache key cache finishIngestDirect key source return $ Just key godirect _ _ = failure diff --git a/debian/changelog b/debian/changelog index 46f1b4ded..e658848bd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +git-annex (4.20130406) UNRELEASED; urgency=low + + * Bugfix: Direct mode no longer repeatedly checksums duplicated files. + + -- Joey Hess Sat, 06 Apr 2013 15:24:15 -0400 + git-annex (4.20130405) unstable; urgency=low * Group subcommands into sections in usage. Closes: #703797 diff --git a/doc/bugs/Direct_mode_keeps_re-checksuming_duplicated_files.mdwn b/doc/bugs/Direct_mode_keeps_re-checksuming_duplicated_files.mdwn index 845b48a99..123786b65 100644 --- a/doc/bugs/Direct_mode_keeps_re-checksuming_duplicated_files.mdwn +++ b/doc/bugs/Direct_mode_keeps_re-checksuming_duplicated_files.mdwn @@ -20,3 +20,6 @@ Secondly, the sync can take quite a while if you have lots of duplicates or a lo ##What version of git-annex are you using? On what operating system? git-annex version: 4.20130227 on Archlinux + +> [[done]]; fixed inode caching code to support multiple files for the +> same content. --[[Joey]] -- cgit v1.2.3