summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-07-17 12:11:05 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-07-17 12:24:34 -0400
commit535697fb264705c6a7b291e0b4a2caa630f55d6a (patch)
tree4c791dfd97c0c74842d852df7724a92db6a839d5
parent90764f2baece55ee596175ee6554401aee4651ab (diff)
Speed up startup time by caching the refs that have been merged into the git-annex branch.
This can speed up git-annex commands by as much as a second, depending on the number of remotes.
-rw-r--r--Annex/Branch.hs109
-rw-r--r--Annex/Locations.hs5
-rw-r--r--CHANGELOG6
3 files changed, 82 insertions, 38 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 0993eaaff..a426c76d8 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -1,6 +1,6 @@
{- management of the git-annex branch
-
- - Copyright 2011-2013 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -30,6 +30,7 @@ import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import qualified Data.Map as M
import Data.Bits.Utils
+import Data.Function
import Control.Concurrent (threadDelay)
import Annex.Common
@@ -81,9 +82,9 @@ hasOrigin = inRepo $ Git.Ref.exists originname
hasSibling :: Annex Bool
hasSibling = not . null <$> siblingBranches
-{- List of git-annex (refs, branches), including the main one and any
- - from remotes. Duplicate refs are filtered out. -}
-siblingBranches :: Annex [(Git.Ref, Git.Branch)]
+{- List of git-annex (shas, branches), including the main one and any
+ - from remotes. Duplicates are filtered out. -}
+siblingBranches :: Annex [(Git.Sha, Git.Branch)]
siblingBranches = inRepo $ Git.Ref.matchingUniq [name]
{- Creates the branch, if it does not already exist. -}
@@ -133,40 +134,50 @@ forceUpdate = updateTo =<< siblingBranches
-
- Returns True if any refs were merged in, False otherwise.
-}
-updateTo :: [(Git.Ref, Git.Branch)] -> Annex Bool
+updateTo :: [(Git.Sha, Git.Branch)] -> Annex Bool
updateTo pairs = do
-- ensure branch exists, and get its current ref
branchref <- getBranch
dirty <- journalDirty
ignoredrefs <- getIgnoredRefs
- (refs, branches) <- unzip <$> filterM (isnewer ignoredrefs) pairs
- if null refs
+ let unignoredrefs = excludeset ignoredrefs pairs
+ tomerge <- if null unignoredrefs
+ then return []
+ else do
+ mergedrefs <- getMergedRefs
+ filterM isnewer (excludeset mergedrefs unignoredrefs)
+ if null tomerge
{- Even when no refs need to be merged, the index
- may still be updated if the branch has gotten ahead
- of the index. -}
- then whenM (needUpdateIndex branchref) $ lockJournal $ \jl -> do
- forceUpdateIndex jl branchref
- {- When there are journalled changes
- - as well as the branch being updated,
- - a commit needs to be done. -}
- when dirty $
- go branchref True [] [] jl
- else lockJournal $ go branchref dirty refs branches
- return $ not $ null refs
+ then do
+ whenM (needUpdateIndex branchref) $ lockJournal $ \jl -> do
+ forceUpdateIndex jl branchref
+ {- When there are journalled changes
+ - as well as the branch being updated,
+ - a commit needs to be done. -}
+ when dirty $
+ go branchref True [] jl
+ {- Only needed for a while, to populate the
+ - newly added merged refs cache with already
+ - merged refs. Can be safely removed at any time. -}
+ addMergedRefs unignoredrefs
+ else lockJournal $ go branchref dirty tomerge
+ return $ not $ null tomerge
where
- isnewer ignoredrefs (r, _)
- | S.member r ignoredrefs = return False
- | otherwise = inRepo $ Git.Branch.changed fullname r
- go branchref dirty refs branches jl = withIndex $ do
+ excludeset s = filter (\(r, _) -> S.notMember r s)
+ isnewer (r, _) = inRepo $ Git.Branch.changed fullname r
+ go branchref dirty tomerge jl = withIndex $ do
+ let (refs, branches) = unzip tomerge
cleanjournal <- if dirty then stageJournal jl else return noop
- let merge_desc = if null branches
+ let merge_desc = if null tomerge
then "update"
else "merging " ++
unwords (map Git.Ref.describe branches) ++
" into " ++ fromRef name
localtransitions <- parseTransitionsStrictly "local"
<$> getLocal transitionsLog
- unless (null branches) $ do
+ unless (null tomerge) $ do
showSideAction merge_desc
mapM_ checkBranchDifferences refs
mergeIndex jl refs
@@ -181,6 +192,7 @@ updateTo pairs = do
then updateIndex jl branchref
else commitIndex jl branchref merge_desc commitrefs
)
+ addMergedRefs tomerge
liftIO cleanjournal
{- Gets the content of a file, which may be in the journal, or in the index
@@ -493,21 +505,6 @@ handleTransitions jl localts refs = do
<$> catFile ref transitionsLog
return (ref, ts)
-ignoreRefs :: [Git.Ref] -> Annex ()
-ignoreRefs rs = do
- old <- getIgnoredRefs
- let s = S.unions [old, S.fromList rs]
- f <- fromRepo gitAnnexIgnoredRefs
- replaceFile f $ \tmp -> liftIO $ writeFile tmp $
- unlines $ map fromRef $ S.elems s
-
-getIgnoredRefs :: Annex (S.Set Git.Ref)
-getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content
- where
- content = do
- f <- fromRepo gitAnnexIgnoredRefs
- liftIO $ catchDefaultIO "" $ readFile f
-
{- Performs the specified transitions on the contents of the index file,
- commits it to the branch, or creates a new branch.
-}
@@ -578,3 +575,41 @@ checkBranchDifferences ref = do
mydiffs <- annexDifferences <$> Annex.getGitConfig
when (theirdiffs /= mydiffs) $
error "Remote repository is tuned in incompatable way; cannot be merged with local repository."
+
+ignoreRefs :: [Git.Sha] -> Annex ()
+ignoreRefs rs = do
+ old <- getIgnoredRefs
+ let s = S.unions [old, S.fromList rs]
+ f <- fromRepo gitAnnexIgnoredRefs
+ replaceFile f $ \tmp -> liftIO $ writeFile tmp $
+ unlines $ map fromRef $ S.elems s
+
+getIgnoredRefs :: Annex (S.Set Git.Sha)
+getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content
+ where
+ content = do
+ f <- fromRepo gitAnnexIgnoredRefs
+ liftIO $ catchDefaultIO "" $ readFile f
+
+addMergedRefs :: [(Git.Sha, Git.Branch)] -> Annex ()
+addMergedRefs [] = return ()
+addMergedRefs new = do
+ old <- getMergedRefs'
+ -- Keep only the newest sha for each branch.
+ let l = nubBy ((==) `on` snd) (new ++ old)
+ f <- fromRepo gitAnnexMergedRefs
+ replaceFile f $ \tmp -> liftIO $ writeFile tmp $
+ unlines $ map (\(s, b) -> fromRef s ++ '\t' : fromRef b) l
+
+getMergedRefs :: Annex (S.Set Git.Sha)
+getMergedRefs = S.fromList . map fst <$> getMergedRefs'
+
+getMergedRefs' :: Annex [(Git.Sha, Git.Branch)]
+getMergedRefs' = do
+ f <- fromRepo gitAnnexMergedRefs
+ s <- liftIO $ catchDefaultIO "" $ readFile f
+ return $ map parse $ lines s
+ where
+ parse l =
+ let (s, b) = separate (== '\t') l
+ in (Ref s, Ref b)
diff --git a/Annex/Locations.hs b/Annex/Locations.hs
index 9b3856602..058993cd2 100644
--- a/Annex/Locations.hs
+++ b/Annex/Locations.hs
@@ -52,6 +52,7 @@ module Annex.Locations (
gitAnnexIndexStatus,
gitAnnexViewIndex,
gitAnnexViewLog,
+ gitAnnexMergedRefs,
gitAnnexIgnoredRefs,
gitAnnexPidFile,
gitAnnexPidLockFile,
@@ -357,6 +358,10 @@ gitAnnexViewIndex r = gitAnnexDir r </> "viewindex"
gitAnnexViewLog :: Git.Repo -> FilePath
gitAnnexViewLog r = gitAnnexDir r </> "viewlog"
+{- List of refs that have already been merged into the git-annex branch. -}
+gitAnnexMergedRefs :: Git.Repo -> FilePath
+gitAnnexMergedRefs r = gitAnnexDir r </> "mergedrefs"
+
{- List of refs that should not be merged into the git-annex branch. -}
gitAnnexIgnoredRefs :: Git.Repo -> FilePath
gitAnnexIgnoredRefs r = gitAnnexDir r </> "ignoredrefs"
diff --git a/CHANGELOG b/CHANGELOG
index 901d7a04a..f80e85640 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -8,7 +8,11 @@ git-annex (6.20160614) UNRELEASED; urgency=medium
* testremote: Fix crash when testing a freshly made external special remote.
* Remove unnecessary rpaths in the git-annex binary, but only when
it's built using make, not cabal.
- This speeds up git-annex statup time by around 50%.
+ This speeds up git-annex startup time by around 50%.
+ * Speed up startup time by caching the refs that have been merged into
+ the git-annex branch.
+ This can speed up git-annex commands by as much as a second,
+ depending on the number of remotes.
* Remove the EKG build flag, since Gentoo for some reason decided to
enable this flag, depsite it not being intended for production use and
so disabled by default.