aboutsummaryrefslogtreecommitdiff
path: root/Annex
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-09-29 13:36:48 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-09-29 13:36:48 -0400
commit4d6f7038b86ebb6c583b8881c2ff47722a32f95f (patch)
tree83f03fb833cbca661c25364ee224c0f6204a1f3d /Annex
parent6cc0c44b26197439d35f3f53c8621f7f113841ab (diff)
Optimisations to git-annex branch query and setting, avoiding repeated copies of the environment.
Speeds up commands like "git-annex find --in remote" by over 50%. Profiling showed that adjustGitEnv was 21% of the time and 37% of the allocations of that command. It copied the environment each time with getEnvironment. The only repeated use of adjustGitEnv is in withIndexFile, which tends to be run at least once per file. So, it was optimised by keeping a cache of the environment, which can be reused. There could be other better ways to optimise this. Maybe get the while environment once at startup. But, then it would have to be serialized back out each time running a child process, so I doubt that would be a net win. It might be better to cache a version of the environment that is pre-modified to use .git-annex/index. But, profiling doesn't show that modifying the enviroment is taking any significant time.
Diffstat (limited to 'Annex')
-rw-r--r--Annex/CatFile.hs2
-rw-r--r--Annex/GitOverlay.hs27
2 files changed, 24 insertions, 5 deletions
diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs
index 8d6237bfe..99d301b4b 100644
--- a/Annex/CatFile.hs
+++ b/Annex/CatFile.hs
@@ -83,7 +83,7 @@ catFileHandle = do
- nothing is using the handles, eg at shutdown. -}
catFileStop :: Annex ()
catFileStop = do
- m <- Annex.withState $ \s ->
+ m <- Annex.withState $ pure . \s ->
(s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s)
liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m)
diff --git a/Annex/GitOverlay.hs b/Annex/GitOverlay.hs
index 1b7ceb2e2..d809e0b23 100644
--- a/Annex/GitOverlay.hs
+++ b/Annex/GitOverlay.hs
@@ -22,9 +22,28 @@ withIndexFile :: FilePath -> Annex a -> Annex a
withIndexFile f a = do
f' <- liftIO $ indexEnvVal f
withAltRepo
- (\g -> addGitEnv g indexEnv f')
+ (usecachedgitenv $ \g -> liftIO $ addGitEnv g indexEnv f')
(\g g' -> g' { gitEnv = gitEnv g })
a
+ where
+ -- This is an optimisation. Since withIndexFile is run repeatedly,
+ -- and addGitEnv uses the slow copyGitEnv when gitEnv is Nothing,
+ -- we cache the copied environment the first time, and reuse it in
+ -- subsequent calls.
+ --
+ -- (This could be done at another level; eg when creating the
+ -- Git object in the first place, but it's more efficient to let
+ -- the enviroment be inherited in all calls to git where it
+ -- does not need to be modified.)
+ usecachedgitenv m g = case gitEnv g of
+ Just _ -> m g
+ Nothing -> do
+ e <- Annex.withState $ \s -> case Annex.cachedgitenv s of
+ Nothing -> do
+ e <- copyGitEnv
+ return (s { Annex.cachedgitenv = Just e }, e)
+ Just e -> return (s, e)
+ m (g { gitEnv = Just e })
{- Runs an action using a different git work tree.
-
@@ -52,7 +71,7 @@ withWorkTree d = withAltRepo
withWorkTreeRelated :: FilePath -> Annex a -> Annex a
withWorkTreeRelated d = withAltRepo modrepo unmodrepo
where
- modrepo g = do
+ modrepo g = liftIO $ do
g' <- addGitEnv g "GIT_COMMON_DIR" =<< absPath (localGitDir g)
g'' <- addGitEnv g' "GIT_DIR" d
return (g'' { gitEnvOverridesGitDir = True })
@@ -62,7 +81,7 @@ withWorkTreeRelated d = withAltRepo modrepo unmodrepo
}
withAltRepo
- :: (Repo -> IO Repo)
+ :: (Repo -> Annex Repo)
-- ^ modify Repo
-> (Repo -> Repo -> Repo)
-- ^ undo modifications; first Repo is the original and second
@@ -71,7 +90,7 @@ withAltRepo
-> Annex a
withAltRepo modrepo unmodrepo a = do
g <- gitRepo
- g' <- liftIO $ modrepo g
+ g' <- modrepo g
q <- Annex.Queue.get
v <- tryNonAsync $ do
Annex.changeState $ \s -> s