summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/Content/Direct.hs6
-rw-r--r--Assistant/Threads/Committer.hs53
-rw-r--r--Utility/InodeCache.hs60
-rw-r--r--debian/changelog2
-rw-r--r--doc/bugs/assistant_does_not_list_remote___39__origin__39__.mdwn2
-rw-r--r--doc/bugs/assistant_ignore_.gitignore.mdwn2
-rw-r--r--doc/bugs/long_running_assistant_causes_resource_starvation_on_OSX.mdwn2
-rw-r--r--doc/bugs/random_files_vanishing_when_assistant_gets_restarted.mdwn6
-rw-r--r--doc/bugs/smarter_flood_filling.mdwn27
-rw-r--r--doc/todo/wishlist:_Tell_git_annex___40__assistant__41___which_files___40__not__41___to_annex_via_.gitattributes.mdwn2
-rw-r--r--doc/todo/wishlist:_an___34__assistant__34___for_web-browsing_--_tracking_the_sources_of_the_downloads.mdwn2
11 files changed, 144 insertions, 20 deletions
diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs
index 164cb64a4..bbf6e310d 100644
--- a/Annex/Content/Direct.hs
+++ b/Annex/Content/Direct.hs
@@ -14,6 +14,7 @@ module Annex.Content.Direct (
updateInodeCache,
writeInodeCache,
compareInodeCaches,
+ compareInodeCachesWith,
sameInodeCache,
sameFileStatus,
removeInodeCache,
@@ -147,12 +148,15 @@ sameFileStatus key status = do
{- If the inodes have changed, only the size and mtime are compared. -}
compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool
compareInodeCaches x y
- | x `compareStrong` y = return True
+ | compareStrong x y = return True
| otherwise = ifM inodesChanged
( return $ compareWeak x y
, return False
)
+compareInodeCachesWith :: Annex InodeComparisonType
+compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
+
{- Some filesystems get new inodes each time they are mounted.
- In order to work on such a filesystem, a sentinal file is used to detect
- when the inodes have changed.
diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs
index b336f650b..5be190c4a 100644
--- a/Assistant/Threads/Committer.hs
+++ b/Assistant/Threads/Committer.hs
@@ -32,11 +32,15 @@ import Config
import Annex.Exception
import Annex.Content
import Annex.Link
+import Annex.CatFile
import qualified Annex
+import Utility.InodeCache
+import Annex.Content.Direct
import Data.Time.Clock
import Data.Tuple.Utils
import qualified Data.Set as S
+import qualified Data.Map as M
import Data.Either
import Control.Concurrent
@@ -90,9 +94,13 @@ waitChangeTime a = runEvery (Seconds 1) <~> do
{- Did we perhaps only get one of the AddChange and RmChange pair
- that make up a rename? -}
lonelychange [(PendingAddChange _ _)] = True
- lonelychange [(Change { changeInfo = i })] | i == RmChange = True
+ lonelychange [c] | isRmChange c = True
lonelychange _ = False
+isRmChange :: Change -> Bool
+isRmChange (Change { changeInfo = i }) | i == RmChange = True
+isRmChange _ = False
+
{- An amount of time that is hopefully imperceptably short for humans,
- while long enough for a computer to get some work done.
- Note that 0.001 is a little too short for rename change batching to
@@ -200,7 +208,9 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
refillChanges postponed
returnWhen (null toadd) $ do
- added <- catMaybes <$> forM toadd add
+ added <- catMaybes <$> if direct
+ then adddirect toadd
+ else forM toadd add
if DirWatcher.eventsCoalesce || null added || direct
then return $ added ++ otherchanges
else do
@@ -238,6 +248,45 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
ret _ = (True, Nothing)
add _ = return Nothing
+ {- In direct mode, avoid overhead of re-injesting a renamed
+ - file, by examining the other Changes to see if a removed
+ - file has the same InodeCache as the new file. If so,
+ - we can just update bookkeeping, and stage the file in git.
+ -}
+ adddirect :: [Change] -> Assistant [Maybe Change]
+ adddirect toadd = do
+ ct <- liftAnnex compareInodeCachesWith
+ m <- liftAnnex $ removedKeysMap ct cs
+ if M.null m
+ then forM toadd add
+ else forM toadd $ \c -> do
+ mcache <- liftIO $ genInodeCache $ changeFile c
+ case mcache of
+ Nothing -> add c
+ Just cache ->
+ case M.lookup (inodeCacheToKey ct cache) m of
+ Nothing -> add c
+ Just k -> fastadd c k cache
+
+ fastadd :: Change -> Key -> InodeCache -> Assistant (Maybe Change)
+ fastadd change key cache = do
+ -- TODO do fast method
+ debug ["rename detected", show change, show key, show cache]
+ add change
+ --return $ Just $ finishedChange change key
+
+ removedKeysMap :: InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key)
+ removedKeysMap ct l = do
+ mks <- forM (filter isRmChange l) $ \c ->
+ catKeyFile $ changeFile c
+ M.fromList . catMaybes <$> forM (catMaybes mks) mkpair
+ where
+ mkpair k = do
+ mcache <- recordedInodeCache k
+ case mcache of
+ Just cache -> return $ Just (inodeCacheToKey ct cache, k)
+ Nothing -> return Nothing
+
failedingest = do
liftAnnex showEndFail
return Nothing
diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs
index 1d3f17887..e08abc6ad 100644
--- a/Utility/InodeCache.hs
+++ b/Utility/InodeCache.hs
@@ -11,22 +11,46 @@ import Common
import System.Posix.Types
import Utility.QuickCheck
-data InodeCache = InodeCache FileID FileOffset EpochTime
+data InodeCachePrim = InodeCachePrim FileID FileOffset EpochTime
+ deriving (Show, Eq, Ord)
+
+newtype InodeCache = InodeCache InodeCachePrim
deriving (Show)
+{- Inode caches can be compared in two different ways, either weakly
+ - or strongly. -}
+data InodeComparisonType = Weakly | Strongly
+ deriving (Eq, Ord)
+
+{- Strong comparison, including inodes. -}
compareStrong :: InodeCache -> InodeCache -> Bool
-compareStrong (InodeCache inode1 size1 mtime1) (InodeCache inode2 size2 mtime2) =
- inode1 == inode2 && size1 == size2 && mtime1 == mtime2
+compareStrong (InodeCache x) (InodeCache y) = x == y
-{- Weak comparison of the inode caches, comparing the size and mtime, but
- - not the actual inode. Useful when inodes have changed, perhaps
+{- Weak comparison of the inode caches, comparing the size and mtime,
+ - but not the actual inode. Useful when inodes have changed, perhaps
- due to some filesystems being remounted. -}
compareWeak :: InodeCache -> InodeCache -> Bool
-compareWeak (InodeCache _ size1 mtime1) (InodeCache _ size2 mtime2) =
+compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) =
size1 == size2 && mtime1 == mtime2
+compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool
+compareBy Strongly = compareStrong
+compareBy Weakly = compareWeak
+
+{- For use in a Map; it's determined at creation time whether this
+ - uses strong or weak comparison for Eq. -}
+data InodeCacheKey = InodeCacheKey InodeComparisonType InodeCachePrim
+ deriving (Ord)
+
+instance Eq InodeCacheKey where
+ (InodeCacheKey ctx x) == (InodeCacheKey cty y) =
+ compareBy (maximum [ctx,cty]) (InodeCache x ) (InodeCache y)
+
+inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
+inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
+
showInodeCache :: InodeCache -> String
-showInodeCache (InodeCache inode size mtime) = unwords
+showInodeCache (InodeCache (InodeCachePrim inode size mtime)) = unwords
[ show inode
, show size
, show mtime
@@ -34,10 +58,12 @@ showInodeCache (InodeCache inode size mtime) = unwords
readInodeCache :: String -> Maybe InodeCache
readInodeCache s = case words s of
- (inode:size:mtime:_) -> InodeCache
- <$> readish inode
- <*> readish size
- <*> readish mtime
+ (inode:size:mtime:_) ->
+ let prim = InodeCachePrim
+ <$> readish inode
+ <*> readish size
+ <*> readish mtime
+ in InodeCache <$> prim
_ -> Nothing
genInodeCache :: FilePath -> IO (Maybe InodeCache)
@@ -45,17 +71,19 @@ genInodeCache f = catchDefaultIO Nothing $ toInodeCache <$> getFileStatus f
toInodeCache :: FileStatus -> Maybe InodeCache
toInodeCache s
- | isRegularFile s = Just $ InodeCache
+ | isRegularFile s = Just $ InodeCache $ InodeCachePrim
(fileID s)
(fileSize s)
(modificationTime s)
| otherwise = Nothing
instance Arbitrary InodeCache where
- arbitrary = InodeCache
- <$> arbitrary
- <*> arbitrary
- <*> arbitrary
+ arbitrary =
+ let prim = InodeCachePrim
+ <$> arbitrary
+ <*> arbitrary
+ <*> arbitrary
+ in InodeCache <$> prim
prop_read_show_inodecache :: InodeCache -> Bool
prop_read_show_inodecache c = case readInodeCache (showInodeCache c) of
diff --git a/debian/changelog b/debian/changelog
index f8ff2d282..44d3bf68d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -50,6 +50,8 @@ git-annex (4.20130228) UNRELEASED; urgency=low
status of that directory, rather than the whole annex.
* Added remote.<name>.annex-gnupg-options setting.
Thanks, guilhem for the patch.
+ * assistant: Optimised handling of renamed files in direct mode,
+ avoiding re-checksumming.
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400
diff --git a/doc/bugs/assistant_does_not_list_remote___39__origin__39__.mdwn b/doc/bugs/assistant_does_not_list_remote___39__origin__39__.mdwn
index 3607cbcaf..cd543bd26 100644
--- a/doc/bugs/assistant_does_not_list_remote___39__origin__39__.mdwn
+++ b/doc/bugs/assistant_does_not_list_remote___39__origin__39__.mdwn
@@ -20,3 +20,5 @@ Please provide any additional information below.
I tried both with direct and indirect mode for the local annex repo.
I am sorry if I am missing the point. I checked the docs, however without much success.
+
+[[!tag /design/assistant]]
diff --git a/doc/bugs/assistant_ignore_.gitignore.mdwn b/doc/bugs/assistant_ignore_.gitignore.mdwn
index a32fd457a..00cdffd66 100644
--- a/doc/bugs/assistant_ignore_.gitignore.mdwn
+++ b/doc/bugs/assistant_ignore_.gitignore.mdwn
@@ -25,3 +25,5 @@ What version of git-annex are you using? On what operating system?
> As noted in [[design/assistant/inotify]]'s TODO list, this
> needs an efficient gitignore query interface in git (DNE)
> or a gitignore parser. --[[Joey]]
+
+[[!tag /design/assistant]]
diff --git a/doc/bugs/long_running_assistant_causes_resource_starvation_on_OSX.mdwn b/doc/bugs/long_running_assistant_causes_resource_starvation_on_OSX.mdwn
index 63ccb693b..392090c9d 100644
--- a/doc/bugs/long_running_assistant_causes_resource_starvation_on_OSX.mdwn
+++ b/doc/bugs/long_running_assistant_causes_resource_starvation_on_OSX.mdwn
@@ -22,3 +22,5 @@ Mac OSX 10.8.2 Mountain Lion Build 12C3006
Please provide any additional information below.
I'm really not sure what to look for next. Happy to take suggestions.
+
+[!tag /design/assistant]]
diff --git a/doc/bugs/random_files_vanishing_when_assistant_gets_restarted.mdwn b/doc/bugs/random_files_vanishing_when_assistant_gets_restarted.mdwn
index ecab8ed0b..deaa340ab 100644
--- a/doc/bugs/random_files_vanishing_when_assistant_gets_restarted.mdwn
+++ b/doc/bugs/random_files_vanishing_when_assistant_gets_restarted.mdwn
@@ -22,9 +22,13 @@ I get messages like:
Already up-to-date.
-Sorry for the german language, I'll try to reproduce it in english, later. After that, the symlinks for the file in the repository are gone. I can get them back by reverting the commit but things like that make me very nervous.
+Sorry for the german language, I'll try to reproduce it in english, later.
+After that, the symlinks for the file in the repository are gone. I can get
+them back by reverting the commit but things like that make me very nervous.
#What version of git-annex are you using? On what operating system?
3.20130102 on Arch Linux x64
+
+[!tag /design/assistant]]
diff --git a/doc/bugs/smarter_flood_filling.mdwn b/doc/bugs/smarter_flood_filling.mdwn
new file mode 100644
index 000000000..cfff3698f
--- /dev/null
+++ b/doc/bugs/smarter_flood_filling.mdwn
@@ -0,0 +1,27 @@
+The assistant performs a flood fill, sending every file to every remote
+that will have it. This is naive, but it's a good way to ensure the file
+gets to every corner of the repo network that it possibly can.
+
+However, this means that locally paired computers will still upload files
+to a transfer repo, even when they're next to each other and that
+is a massive waste of bandwidth.
+
+It occurred to me this morning that there is a simple change that can avoid
+this.
+
+1. Ensure that locally paired computers have a lower cost than network
+ transfer remotes.
+2. When queuing uploads, queue transfers to the lowest cost remotes first.
+3. Just before starting a transfer, re-check if the transfer is still wanted.
+
+Now, unnecessary transfers to tranfer repos are avoided if it can send
+the file locally instead.
+
+It doesn't solve it for all network topologies of course. If there
+are three computers paired in a line "A --- B --- C", and all 3 share
+a transfer repo, A will still send to both B and the transfer repo
+even though B can reach C via a faster route.
+
+See also: [[assistant does not always use repo cost info when queueing downloads]]
+
+[[!tag /design/assistant]]
diff --git a/doc/todo/wishlist:_Tell_git_annex___40__assistant__41___which_files___40__not__41___to_annex_via_.gitattributes.mdwn b/doc/todo/wishlist:_Tell_git_annex___40__assistant__41___which_files___40__not__41___to_annex_via_.gitattributes.mdwn
index 135ec70ee..e377deae7 100644
--- a/doc/todo/wishlist:_Tell_git_annex___40__assistant__41___which_files___40__not__41___to_annex_via_.gitattributes.mdwn
+++ b/doc/todo/wishlist:_Tell_git_annex___40__assistant__41___which_files___40__not__41___to_annex_via_.gitattributes.mdwn
@@ -1,3 +1,5 @@
Title says it all.
It would be nice if I could tell git annex (assistant) which files (not) to annex (automatically).
+
+[[!tag /design/assistant]]
diff --git a/doc/todo/wishlist:_an___34__assistant__34___for_web-browsing_--_tracking_the_sources_of_the_downloads.mdwn b/doc/todo/wishlist:_an___34__assistant__34___for_web-browsing_--_tracking_the_sources_of_the_downloads.mdwn
index ce79aa6a6..c910ace83 100644
--- a/doc/todo/wishlist:_an___34__assistant__34___for_web-browsing_--_tracking_the_sources_of_the_downloads.mdwn
+++ b/doc/todo/wishlist:_an___34__assistant__34___for_web-browsing_--_tracking_the_sources_of_the_downloads.mdwn
@@ -24,3 +24,5 @@ Then I'll have the interesting literature there when I'm offline; the source URL
> The only wrinkle is that the webapp runs under a new url each time
> it starts, due to using a high port and embedding some auth token in the
> url. --[[Joey]]
+
+[[!tag /design/assistant]]