summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-03-11 12:56:47 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-03-11 12:56:47 -0400
commit8c5d75f6fbde080f3e3ad0766b5f3a1d4cee8d81 (patch)
treea74c92d131733dc0df454f71c6372edacc745aa8
parentde5c24518b878322cd883ab4d4cfb46d07dbdce3 (diff)
detect renames during commit, and .. um, do nothing special because it's lunch time
But I'm well set up to fast-track direct mode adds for renames now.
-rw-r--r--Annex/Content/Direct.hs6
-rw-r--r--Assistant/Threads/Committer.hs53
-rw-r--r--Utility/InodeCache.hs60
-rw-r--r--debian/changelog2
4 files changed, 102 insertions, 19 deletions
diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs
index 164cb64a4..bbf6e310d 100644
--- a/Annex/Content/Direct.hs
+++ b/Annex/Content/Direct.hs
@@ -14,6 +14,7 @@ module Annex.Content.Direct (
updateInodeCache,
writeInodeCache,
compareInodeCaches,
+ compareInodeCachesWith,
sameInodeCache,
sameFileStatus,
removeInodeCache,
@@ -147,12 +148,15 @@ sameFileStatus key status = do
{- If the inodes have changed, only the size and mtime are compared. -}
compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool
compareInodeCaches x y
- | x `compareStrong` y = return True
+ | compareStrong x y = return True
| otherwise = ifM inodesChanged
( return $ compareWeak x y
, return False
)
+compareInodeCachesWith :: Annex InodeComparisonType
+compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
+
{- Some filesystems get new inodes each time they are mounted.
- In order to work on such a filesystem, a sentinal file is used to detect
- when the inodes have changed.
diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs
index b336f650b..5be190c4a 100644
--- a/Assistant/Threads/Committer.hs
+++ b/Assistant/Threads/Committer.hs
@@ -32,11 +32,15 @@ import Config
import Annex.Exception
import Annex.Content
import Annex.Link
+import Annex.CatFile
import qualified Annex
+import Utility.InodeCache
+import Annex.Content.Direct
import Data.Time.Clock
import Data.Tuple.Utils
import qualified Data.Set as S
+import qualified Data.Map as M
import Data.Either
import Control.Concurrent
@@ -90,9 +94,13 @@ waitChangeTime a = runEvery (Seconds 1) <~> do
{- Did we perhaps only get one of the AddChange and RmChange pair
- that make up a rename? -}
lonelychange [(PendingAddChange _ _)] = True
- lonelychange [(Change { changeInfo = i })] | i == RmChange = True
+ lonelychange [c] | isRmChange c = True
lonelychange _ = False
+isRmChange :: Change -> Bool
+isRmChange (Change { changeInfo = i }) | i == RmChange = True
+isRmChange _ = False
+
{- An amount of time that is hopefully imperceptably short for humans,
- while long enough for a computer to get some work done.
- Note that 0.001 is a little too short for rename change batching to
@@ -200,7 +208,9 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
refillChanges postponed
returnWhen (null toadd) $ do
- added <- catMaybes <$> forM toadd add
+ added <- catMaybes <$> if direct
+ then adddirect toadd
+ else forM toadd add
if DirWatcher.eventsCoalesce || null added || direct
then return $ added ++ otherchanges
else do
@@ -238,6 +248,45 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
ret _ = (True, Nothing)
add _ = return Nothing
+ {- In direct mode, avoid overhead of re-injesting a renamed
+ - file, by examining the other Changes to see if a removed
+ - file has the same InodeCache as the new file. If so,
+ - we can just update bookkeeping, and stage the file in git.
+ -}
+ adddirect :: [Change] -> Assistant [Maybe Change]
+ adddirect toadd = do
+ ct <- liftAnnex compareInodeCachesWith
+ m <- liftAnnex $ removedKeysMap ct cs
+ if M.null m
+ then forM toadd add
+ else forM toadd $ \c -> do
+ mcache <- liftIO $ genInodeCache $ changeFile c
+ case mcache of
+ Nothing -> add c
+ Just cache ->
+ case M.lookup (inodeCacheToKey ct cache) m of
+ Nothing -> add c
+ Just k -> fastadd c k cache
+
+ fastadd :: Change -> Key -> InodeCache -> Assistant (Maybe Change)
+ fastadd change key cache = do
+ -- TODO do fast method
+ debug ["rename detected", show change, show key, show cache]
+ add change
+ --return $ Just $ finishedChange change key
+
+ removedKeysMap :: InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key)
+ removedKeysMap ct l = do
+ mks <- forM (filter isRmChange l) $ \c ->
+ catKeyFile $ changeFile c
+ M.fromList . catMaybes <$> forM (catMaybes mks) mkpair
+ where
+ mkpair k = do
+ mcache <- recordedInodeCache k
+ case mcache of
+ Just cache -> return $ Just (inodeCacheToKey ct cache, k)
+ Nothing -> return Nothing
+
failedingest = do
liftAnnex showEndFail
return Nothing
diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs
index 1d3f17887..e08abc6ad 100644
--- a/Utility/InodeCache.hs
+++ b/Utility/InodeCache.hs
@@ -11,22 +11,46 @@ import Common
import System.Posix.Types
import Utility.QuickCheck
-data InodeCache = InodeCache FileID FileOffset EpochTime
+data InodeCachePrim = InodeCachePrim FileID FileOffset EpochTime
+ deriving (Show, Eq, Ord)
+
+newtype InodeCache = InodeCache InodeCachePrim
deriving (Show)
+{- Inode caches can be compared in two different ways, either weakly
+ - or strongly. -}
+data InodeComparisonType = Weakly | Strongly
+ deriving (Eq, Ord)
+
+{- Strong comparison, including inodes. -}
compareStrong :: InodeCache -> InodeCache -> Bool
-compareStrong (InodeCache inode1 size1 mtime1) (InodeCache inode2 size2 mtime2) =
- inode1 == inode2 && size1 == size2 && mtime1 == mtime2
+compareStrong (InodeCache x) (InodeCache y) = x == y
-{- Weak comparison of the inode caches, comparing the size and mtime, but
- - not the actual inode. Useful when inodes have changed, perhaps
+{- Weak comparison of the inode caches, comparing the size and mtime,
+ - but not the actual inode. Useful when inodes have changed, perhaps
- due to some filesystems being remounted. -}
compareWeak :: InodeCache -> InodeCache -> Bool
-compareWeak (InodeCache _ size1 mtime1) (InodeCache _ size2 mtime2) =
+compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) =
size1 == size2 && mtime1 == mtime2
+compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool
+compareBy Strongly = compareStrong
+compareBy Weakly = compareWeak
+
+{- For use in a Map; it's determined at creation time whether this
+ - uses strong or weak comparison for Eq. -}
+data InodeCacheKey = InodeCacheKey InodeComparisonType InodeCachePrim
+ deriving (Ord)
+
+instance Eq InodeCacheKey where
+ (InodeCacheKey ctx x) == (InodeCacheKey cty y) =
+ compareBy (maximum [ctx,cty]) (InodeCache x ) (InodeCache y)
+
+inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
+inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
+
showInodeCache :: InodeCache -> String
-showInodeCache (InodeCache inode size mtime) = unwords
+showInodeCache (InodeCache (InodeCachePrim inode size mtime)) = unwords
[ show inode
, show size
, show mtime
@@ -34,10 +58,12 @@ showInodeCache (InodeCache inode size mtime) = unwords
readInodeCache :: String -> Maybe InodeCache
readInodeCache s = case words s of
- (inode:size:mtime:_) -> InodeCache
- <$> readish inode
- <*> readish size
- <*> readish mtime
+ (inode:size:mtime:_) ->
+ let prim = InodeCachePrim
+ <$> readish inode
+ <*> readish size
+ <*> readish mtime
+ in InodeCache <$> prim
_ -> Nothing
genInodeCache :: FilePath -> IO (Maybe InodeCache)
@@ -45,17 +71,19 @@ genInodeCache f = catchDefaultIO Nothing $ toInodeCache <$> getFileStatus f
toInodeCache :: FileStatus -> Maybe InodeCache
toInodeCache s
- | isRegularFile s = Just $ InodeCache
+ | isRegularFile s = Just $ InodeCache $ InodeCachePrim
(fileID s)
(fileSize s)
(modificationTime s)
| otherwise = Nothing
instance Arbitrary InodeCache where
- arbitrary = InodeCache
- <$> arbitrary
- <*> arbitrary
- <*> arbitrary
+ arbitrary =
+ let prim = InodeCachePrim
+ <$> arbitrary
+ <*> arbitrary
+ <*> arbitrary
+ in InodeCache <$> prim
prop_read_show_inodecache :: InodeCache -> Bool
prop_read_show_inodecache c = case readInodeCache (showInodeCache c) of
diff --git a/debian/changelog b/debian/changelog
index f8ff2d282..44d3bf68d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -50,6 +50,8 @@ git-annex (4.20130228) UNRELEASED; urgency=low
status of that directory, rather than the whole annex.
* Added remote.<name>.annex-gnupg-options setting.
Thanks, guilhem for the patch.
+ * assistant: Optimised handling of renamed files in direct mode,
+ avoiding re-checksumming.
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400