From 8c5d75f6fbde080f3e3ad0766b5f3a1d4cee8d81 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 11 Mar 2013 12:56:47 -0400 Subject: detect renames during commit, and .. um, do nothing special because it's lunch time But I'm well set up to fast-track direct mode adds for renames now. --- Annex/Content/Direct.hs | 6 ++++- Assistant/Threads/Committer.hs | 53 +++++++++++++++++++++++++++++++++++-- Utility/InodeCache.hs | 60 +++++++++++++++++++++++++++++++----------- debian/changelog | 2 ++ 4 files changed, 102 insertions(+), 19 deletions(-) diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs index 164cb64a4..bbf6e310d 100644 --- a/Annex/Content/Direct.hs +++ b/Annex/Content/Direct.hs @@ -14,6 +14,7 @@ module Annex.Content.Direct ( updateInodeCache, writeInodeCache, compareInodeCaches, + compareInodeCachesWith, sameInodeCache, sameFileStatus, removeInodeCache, @@ -147,12 +148,15 @@ sameFileStatus key status = do {- If the inodes have changed, only the size and mtime are compared. -} compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool compareInodeCaches x y - | x `compareStrong` y = return True + | compareStrong x y = return True | otherwise = ifM inodesChanged ( return $ compareWeak x y , return False ) +compareInodeCachesWith :: Annex InodeComparisonType +compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) + {- Some filesystems get new inodes each time they are mounted. - In order to work on such a filesystem, a sentinal file is used to detect - when the inodes have changed. diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index b336f650b..5be190c4a 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -32,11 +32,15 @@ import Config import Annex.Exception import Annex.Content import Annex.Link +import Annex.CatFile import qualified Annex +import Utility.InodeCache +import Annex.Content.Direct import Data.Time.Clock import Data.Tuple.Utils import qualified Data.Set as S +import qualified Data.Map as M import Data.Either import Control.Concurrent @@ -90,9 +94,13 @@ waitChangeTime a = runEvery (Seconds 1) <~> do {- Did we perhaps only get one of the AddChange and RmChange pair - that make up a rename? -} lonelychange [(PendingAddChange _ _)] = True - lonelychange [(Change { changeInfo = i })] | i == RmChange = True + lonelychange [c] | isRmChange c = True lonelychange _ = False +isRmChange :: Change -> Bool +isRmChange (Change { changeInfo = i }) | i == RmChange = True +isRmChange _ = False + {- An amount of time that is hopefully imperceptably short for humans, - while long enough for a computer to get some work done. - Note that 0.001 is a little too short for rename change batching to @@ -200,7 +208,9 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do refillChanges postponed returnWhen (null toadd) $ do - added <- catMaybes <$> forM toadd add + added <- catMaybes <$> if direct + then adddirect toadd + else forM toadd add if DirWatcher.eventsCoalesce || null added || direct then return $ added ++ otherchanges else do @@ -238,6 +248,45 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do ret _ = (True, Nothing) add _ = return Nothing + {- In direct mode, avoid overhead of re-injesting a renamed + - file, by examining the other Changes to see if a removed + - file has the same InodeCache as the new file. If so, + - we can just update bookkeeping, and stage the file in git. + -} + adddirect :: [Change] -> Assistant [Maybe Change] + adddirect toadd = do + ct <- liftAnnex compareInodeCachesWith + m <- liftAnnex $ removedKeysMap ct cs + if M.null m + then forM toadd add + else forM toadd $ \c -> do + mcache <- liftIO $ genInodeCache $ changeFile c + case mcache of + Nothing -> add c + Just cache -> + case M.lookup (inodeCacheToKey ct cache) m of + Nothing -> add c + Just k -> fastadd c k cache + + fastadd :: Change -> Key -> InodeCache -> Assistant (Maybe Change) + fastadd change key cache = do + -- TODO do fast method + debug ["rename detected", show change, show key, show cache] + add change + --return $ Just $ finishedChange change key + + removedKeysMap :: InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key) + removedKeysMap ct l = do + mks <- forM (filter isRmChange l) $ \c -> + catKeyFile $ changeFile c + M.fromList . catMaybes <$> forM (catMaybes mks) mkpair + where + mkpair k = do + mcache <- recordedInodeCache k + case mcache of + Just cache -> return $ Just (inodeCacheToKey ct cache, k) + Nothing -> return Nothing + failedingest = do liftAnnex showEndFail return Nothing diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index 1d3f17887..e08abc6ad 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -11,22 +11,46 @@ import Common import System.Posix.Types import Utility.QuickCheck -data InodeCache = InodeCache FileID FileOffset EpochTime +data InodeCachePrim = InodeCachePrim FileID FileOffset EpochTime + deriving (Show, Eq, Ord) + +newtype InodeCache = InodeCache InodeCachePrim deriving (Show) +{- Inode caches can be compared in two different ways, either weakly + - or strongly. -} +data InodeComparisonType = Weakly | Strongly + deriving (Eq, Ord) + +{- Strong comparison, including inodes. -} compareStrong :: InodeCache -> InodeCache -> Bool -compareStrong (InodeCache inode1 size1 mtime1) (InodeCache inode2 size2 mtime2) = - inode1 == inode2 && size1 == size2 && mtime1 == mtime2 +compareStrong (InodeCache x) (InodeCache y) = x == y -{- Weak comparison of the inode caches, comparing the size and mtime, but - - not the actual inode. Useful when inodes have changed, perhaps +{- Weak comparison of the inode caches, comparing the size and mtime, + - but not the actual inode. Useful when inodes have changed, perhaps - due to some filesystems being remounted. -} compareWeak :: InodeCache -> InodeCache -> Bool -compareWeak (InodeCache _ size1 mtime1) (InodeCache _ size2 mtime2) = +compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) = size1 == size2 && mtime1 == mtime2 +compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool +compareBy Strongly = compareStrong +compareBy Weakly = compareWeak + +{- For use in a Map; it's determined at creation time whether this + - uses strong or weak comparison for Eq. -} +data InodeCacheKey = InodeCacheKey InodeComparisonType InodeCachePrim + deriving (Ord) + +instance Eq InodeCacheKey where + (InodeCacheKey ctx x) == (InodeCacheKey cty y) = + compareBy (maximum [ctx,cty]) (InodeCache x ) (InodeCache y) + +inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey +inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim + showInodeCache :: InodeCache -> String -showInodeCache (InodeCache inode size mtime) = unwords +showInodeCache (InodeCache (InodeCachePrim inode size mtime)) = unwords [ show inode , show size , show mtime @@ -34,10 +58,12 @@ showInodeCache (InodeCache inode size mtime) = unwords readInodeCache :: String -> Maybe InodeCache readInodeCache s = case words s of - (inode:size:mtime:_) -> InodeCache - <$> readish inode - <*> readish size - <*> readish mtime + (inode:size:mtime:_) -> + let prim = InodeCachePrim + <$> readish inode + <*> readish size + <*> readish mtime + in InodeCache <$> prim _ -> Nothing genInodeCache :: FilePath -> IO (Maybe InodeCache) @@ -45,17 +71,19 @@ genInodeCache f = catchDefaultIO Nothing $ toInodeCache <$> getFileStatus f toInodeCache :: FileStatus -> Maybe InodeCache toInodeCache s - | isRegularFile s = Just $ InodeCache + | isRegularFile s = Just $ InodeCache $ InodeCachePrim (fileID s) (fileSize s) (modificationTime s) | otherwise = Nothing instance Arbitrary InodeCache where - arbitrary = InodeCache - <$> arbitrary - <*> arbitrary - <*> arbitrary + arbitrary = + let prim = InodeCachePrim + <$> arbitrary + <*> arbitrary + <*> arbitrary + in InodeCache <$> prim prop_read_show_inodecache :: InodeCache -> Bool prop_read_show_inodecache c = case readInodeCache (showInodeCache c) of diff --git a/debian/changelog b/debian/changelog index f8ff2d282..44d3bf68d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -50,6 +50,8 @@ git-annex (4.20130228) UNRELEASED; urgency=low status of that directory, rather than the whole annex. * Added remote..annex-gnupg-options setting. Thanks, guilhem for the patch. + * assistant: Optimised handling of renamed files in direct mode, + avoiding re-checksumming. -- Joey Hess Wed, 27 Feb 2013 23:20:40 -0400 -- cgit v1.2.3