summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-04-06 16:01:39 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-04-06 16:07:25 -0400
commitf758f6d5cbef989bff75fcd140edb8e0b8899b84 (patch)
treec7832b9d65a1a0411662f4d3fb6a6f9f1868a065
parentd653a5842f8a070e7d53a6f8fcd7838106efeee7 (diff)
Bugfix: Direct mode no longer repeatedly checksums duplicated files.
Fixed by storing a list of cached inodes for a key, instead of just one. Backwards compatability note: An old git-annex version will fail to parse an inode cache file that has been written by a new version, and has multiple items. It will succees if just one. So old git-annexes will have even worse behavior when there are duplicated files, if that is possible. I don't think it will be a problem. (Famous last words.) Also, note that it doesn't expire old and unused inode caches for a key. It would be possible to add this if needed; just look through the associated files for a key and if there are more cached inodes, throw out any not corresponding to associated files. Unless a file is being copied repeatedly and the old copy deleted, this lack of expiry should not be a problem.
-rw-r--r--Annex/Content/Direct.hs54
-rw-r--r--Annex/Direct.hs8
-rw-r--r--Assistant/Threads/Committer.hs9
-rw-r--r--Command/Add.hs2
-rw-r--r--debian/changelog6
-rw-r--r--doc/bugs/Direct_mode_keeps_re-checksuming_duplicated_files.mdwn3
6 files changed, 56 insertions, 26 deletions
diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs
index 1f9ddb784..54befdf73 100644
--- a/Annex/Content/Direct.hs
+++ b/Annex/Content/Direct.hs
@@ -12,10 +12,12 @@ module Annex.Content.Direct (
goodContent,
recordedInodeCache,
updateInodeCache,
+ addInodeCache,
writeInodeCache,
compareInodeCaches,
compareInodeCachesWith,
sameInodeCache,
+ elemInodeCaches,
sameFileStatus,
removeInodeCache,
toInodeCache,
@@ -101,21 +103,36 @@ normaliseAssociatedFile file = do
goodContent :: Key -> FilePath -> Annex Bool
goodContent key file = sameInodeCache file =<< recordedInodeCache key
-{- Gets the recorded inode cache for a key. -}
-recordedInodeCache :: Key -> Annex (Maybe InodeCache)
+{- Gets the recorded inode cache for a key.
+ -
+ - A key can be associated with multiple files, so may return more than
+ - one. -}
+recordedInodeCache :: Key -> Annex [InodeCache]
recordedInodeCache key = withInodeCacheFile key $ \f ->
- liftIO $ catchDefaultIO Nothing $ readInodeCache <$> readFile f
+ liftIO $ catchDefaultIO [] $
+ mapMaybe readInodeCache . lines <$> readFile f
-{- Stores a cache of attributes for a file that is associated with a key. -}
+{- Caches an inode for a file.
+ -
+ - Anything else already cached is preserved.
+ -}
updateInodeCache :: Key -> FilePath -> Annex ()
-updateInodeCache key file = maybe noop (writeInodeCache key)
+updateInodeCache key file = maybe noop (addInodeCache key)
=<< liftIO (genInodeCache file)
-{- Writes a cache for a key. -}
-writeInodeCache :: Key -> InodeCache -> Annex ()
-writeInodeCache key cache = withInodeCacheFile key $ \f -> do
+{- Adds another inode to the cache for a key. -}
+addInodeCache :: Key -> InodeCache -> Annex ()
+addInodeCache key cache = do
+ oldcaches <- recordedInodeCache key
+ unlessM (elemInodeCaches cache oldcaches) $
+ writeInodeCache key (cache:oldcaches)
+
+{- Writes inode cache for a key. -}
+writeInodeCache :: Key -> [InodeCache] -> Annex ()
+writeInodeCache key caches = withInodeCacheFile key $ \f -> do
createContentDir f
- liftIO $ writeFile f $ showInodeCache cache
+ liftIO $ writeFile f $
+ unlines $ map showInodeCache caches
{- Removes an inode cache. -}
removeInodeCache :: Key -> Annex ()
@@ -127,12 +144,12 @@ withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a
withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key)
{- Checks if a InodeCache matches the current version of a file. -}
-sameInodeCache :: FilePath -> Maybe InodeCache -> Annex Bool
-sameInodeCache _ Nothing = return False
-sameInodeCache file (Just old) = go =<< liftIO (genInodeCache file)
+sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool
+sameInodeCache _ [] = return False
+sameInodeCache file old = go =<< liftIO (genInodeCache file)
where
go Nothing = return False
- go (Just curr) = compareInodeCaches curr old
+ go (Just curr) = elemInodeCaches curr old
{- Checks if a FileStatus matches the recorded InodeCache of a file. -}
sameFileStatus :: Key -> FileStatus -> Annex Bool
@@ -140,8 +157,8 @@ sameFileStatus key status = do
old <- recordedInodeCache key
let curr = toInodeCache status
case (old, curr) of
- (Just o, Just c) -> compareInodeCaches o c
- (Nothing, Nothing) -> return True
+ (_, Just c) -> elemInodeCaches c old
+ ([], Nothing) -> return True
_ -> return False
{- If the inodes have changed, only the size and mtime are compared. -}
@@ -153,6 +170,13 @@ compareInodeCaches x y
, return False
)
+elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool
+elemInodeCaches _ [] = return False
+elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l)
+ ( return True
+ , elemInodeCaches c ls
+ )
+
compareInodeCachesWith :: Annex InodeComparisonType
compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
diff --git a/Annex/Direct.hs b/Annex/Direct.hs
index e3779adc8..a0388017e 100644
--- a/Annex/Direct.hs
+++ b/Annex/Direct.hs
@@ -52,8 +52,8 @@ stageDirect = do
- it really was. -}
oldcache <- recordedInodeCache key
case oldcache of
- Nothing -> modifiedannexed file key cache
- Just c -> unlessM (compareInodeCaches c cache) $
+ [] -> modifiedannexed file key cache
+ _ -> unlessM (elemInodeCaches cache oldcache) $
modifiedannexed file key cache
(Just key, Nothing, _) -> deletedannexed file key
(Nothing, Nothing, _) -> deletegit file
@@ -87,11 +87,11 @@ addDirect file cache = do
got Nothing = do
showEndFail
return False
- got (Just (key, _)) = ifM (sameInodeCache file $ Just cache)
+ got (Just (key, _)) = ifM (sameInodeCache file [cache])
( do
l <- inRepo $ gitAnnexLink file key
stageSymlink file =<< hashSymlink l
- writeInodeCache key cache
+ addInodeCache key cache
void $ addAssociatedFile key file
logStatus key InfoPresent
showEndOk
diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs
index bee359d59..727b85840 100644
--- a/Assistant/Threads/Committer.hs
+++ b/Assistant/Threads/Committer.hs
@@ -297,13 +297,10 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
removedKeysMap ct l = do
mks <- forM (filter isRmChange l) $ \c ->
catKeyFile $ changeFile c
- M.fromList . catMaybes <$> forM (catMaybes mks) mkpair
+ M.fromList . concat <$> mapM mkpairs (catMaybes mks)
where
- mkpair k = do
- mcache <- recordedInodeCache k
- case mcache of
- Just cache -> return $ Just (inodeCacheToKey ct cache, k)
- Nothing -> return Nothing
+ mkpairs k = map (\c -> (inodeCacheToKey ct c, k)) <$>
+ recordedInodeCache k
failedingest = do
liftAnnex showEndFail
diff --git a/Command/Add.hs b/Command/Add.hs
index 30e989e4c..a5dfc1d1c 100644
--- a/Command/Add.hs
+++ b/Command/Add.hs
@@ -132,7 +132,7 @@ ingest (Just source) = do
goindirect Nothing _ = failure
godirect (Just (key, _)) (Just cache) = do
- writeInodeCache key cache
+ addInodeCache key cache
finishIngestDirect key source
return $ Just key
godirect _ _ = failure
diff --git a/debian/changelog b/debian/changelog
index 46f1b4ded..e658848bd 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+git-annex (4.20130406) UNRELEASED; urgency=low
+
+ * Bugfix: Direct mode no longer repeatedly checksums duplicated files.
+
+ -- Joey Hess <joeyh@debian.org> Sat, 06 Apr 2013 15:24:15 -0400
+
git-annex (4.20130405) unstable; urgency=low
* Group subcommands into sections in usage. Closes: #703797
diff --git a/doc/bugs/Direct_mode_keeps_re-checksuming_duplicated_files.mdwn b/doc/bugs/Direct_mode_keeps_re-checksuming_duplicated_files.mdwn
index 845b48a99..123786b65 100644
--- a/doc/bugs/Direct_mode_keeps_re-checksuming_duplicated_files.mdwn
+++ b/doc/bugs/Direct_mode_keeps_re-checksuming_duplicated_files.mdwn
@@ -20,3 +20,6 @@ Secondly, the sync can take quite a while if you have lots of duplicates or a lo
##What version of git-annex are you using? On what operating system?
git-annex version: 4.20130227 on Archlinux
+
+> [[done]]; fixed inode caching code to support multiple files for the
+> same content. --[[Joey]]