diff options
author | Joey Hess <joeyh@joeyh.name> | 2016-03-14 15:58:46 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2016-03-14 15:58:46 -0400 |
commit | 90db9740d0d64b34f2e46f57f8aaca235088734c (patch) | |
tree | ac4d7e1b03bfcc337d02569acb87a1481f610746 | |
parent | 9b29bd39c8dbf23bdf6930b51aba13992ccc49de (diff) |
Sped up git-annex add in direct mode and v6 by using git hash-object --batch.
Speeds up hashSymlink and hashPointerFile.
-rw-r--r-- | Annex.hs | 3 | ||||
-rw-r--r-- | Annex/Concurrent.hs | 2 | ||||
-rw-r--r-- | Annex/HashObject.hs | 19 | ||||
-rw-r--r-- | Annex/Link.hs | 12 | ||||
-rw-r--r-- | Annex/View.hs | 20 | ||||
-rw-r--r-- | debian/changelog | 2 |
6 files changed, 25 insertions, 33 deletions
@@ -42,6 +42,7 @@ import qualified Git import qualified Git.Config import Annex.Fixup import Git.CatFile +import Git.HashObject import Git.CheckAttr import Git.CheckIgnore import qualified Git.Hook @@ -106,6 +107,7 @@ data AnnexState = AnnexState , branchstate :: BranchState , repoqueue :: Maybe Git.Queue.Queue , catfilehandles :: M.Map FilePath CatFileHandle + , hashobjecthandle :: Maybe HashObjectHandle , checkattrhandle :: Maybe CheckAttrHandle , checkignorehandle :: Maybe (Maybe CheckIgnoreHandle) , forcebackend :: Maybe String @@ -151,6 +153,7 @@ newState c r = AnnexState , branchstate = startBranchState , repoqueue = Nothing , catfilehandles = M.empty + , hashobjecthandle = Nothing , checkattrhandle = Nothing , checkignorehandle = Nothing , forcebackend = Nothing diff --git a/Annex/Concurrent.hs b/Annex/Concurrent.hs index d5809df45..ee19d4766 100644 --- a/Annex/Concurrent.hs +++ b/Annex/Concurrent.hs @@ -11,6 +11,7 @@ import Annex.Common import Annex import Annex.CatFile import Annex.CheckAttr +import Annex.HashObject import Annex.CheckIgnore import qualified Annex.Queue @@ -64,4 +65,5 @@ mergeState st = do closehandles = do catFileStop checkAttrStop + hashObjectStop checkIgnoreStop diff --git a/Annex/HashObject.hs b/Annex/HashObject.hs index aa8c2a174..16f741407 100644 --- a/Annex/HashObject.hs +++ b/Annex/HashObject.hs @@ -12,38 +12,31 @@ module Annex.HashObject ( hashObjectStop, ) where -import qualified Data.ByteString.Lazy as L -import qualified Data.Map as M -import System.PosixCompat.Types - import Annex.Common -import qualified Git import qualified Git.HashObject import qualified Annex import Git.Types -import Git.FilePath -import qualified Git.Ref -import Annex.Link hashObjectHandle :: Annex Git.HashObject.HashObjectHandle hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle where startup = do - inRepo $ Git.hashObjectStart + h <- inRepo $ Git.HashObject.hashObjectStart Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h } return h hashObjectStop :: Annex () -hashObjectStop = maybe noop stop =<< Annex.hashobjecthandle +hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle where stop h = do - liftIO $ Git.hashObjectStop h + liftIO $ Git.HashObject.hashObjectStop h Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing } + return () hashFile :: FilePath -> Annex Sha hashFile f = do h <- hashObjectHandle - Git.HashObject.hashFile h f + liftIO $ Git.HashObject.hashFile h f {- Note that the content will be written to a temp file. - So it may be faster to use Git.HashObject.hashObject for large @@ -51,4 +44,4 @@ hashFile f = do hashBlob :: String -> Annex Sha hashBlob content = do h <- hashObjectHandle - Git.HashObject.hashFile h content + liftIO $ Git.HashObject.hashBlob h content diff --git a/Annex/Link.hs b/Annex/Link.hs index 629e77d7f..b191bce6f 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -18,11 +18,11 @@ module Annex.Link where import Annex.Common import qualified Annex -import qualified Git.HashObject import qualified Git.UpdateIndex import qualified Annex.Queue import Git.Types import Git.FilePath +import Annex.HashObject import qualified Data.ByteString.Lazy as L import Data.Int @@ -105,12 +105,7 @@ addAnnexLink linktarget file = do {- Injects a symlink target into git, returning its Sha. -} hashSymlink :: LinkTarget -> Annex Sha -hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject $ - toInternalGitPath linktarget - -hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha -hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $ - toInternalGitPath linktarget +hashSymlink linktarget = hashBlob (toInternalGitPath linktarget) {- Stages a symlink to an annexed object, using a Sha of its target. -} stageSymlink :: FilePath -> Sha -> Annex () @@ -120,8 +115,7 @@ stageSymlink file sha = {- Injects a pointer file content into git, returning its Sha. -} hashPointerFile :: Key -> Annex Sha -hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $ - formatPointer key +hashPointerFile key = hashBlob (formatPointer key) {- Stages a pointer file, using a Sha of its content -} stagePointerFile :: FilePath -> Sha -> Annex () diff --git a/Annex/View.hs b/Annex/View.hs index 14c3eccad..0078c2cad 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -19,7 +19,7 @@ import qualified Git.LsFiles import qualified Git.Ref import Git.UpdateIndex import Git.Sha -import Git.HashObject +import Annex.HashObject import Git.Types import Git.FilePath import Annex.WorkTree @@ -340,38 +340,36 @@ applyView' mkviewedfile getfilemetadata view = do (l, clean) <- inRepo $ Git.LsFiles.inRepo [top] liftIO . nukeFile =<< fromRepo gitAnnexViewIndex uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex - hasher <- inRepo hashObjectStart forM_ l $ \f -> do relf <- getTopFilePath <$> inRepo (toTopFilePath f) - go uh hasher relf =<< lookupFile f + go uh relf =<< lookupFile f liftIO $ do - hashObjectStop hasher void $ stopUpdateIndex uh void clean genViewBranch view where genviewedfiles = viewedFiles view mkviewedfile -- enables memoization - go uh hasher f (Just k) = do + go uh f (Just k) = do metadata <- getCurrentMetaData k let metadata' = getfilemetadata f `unionMetaData` metadata forM_ (genviewedfiles f metadata') $ \fv -> do f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv - stagesymlink uh hasher f' =<< calcRepo (gitAnnexLink f' k) - go uh hasher f Nothing + stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k) + go uh f Nothing | "." `isPrefixOf` f = do s <- liftIO $ getSymbolicLinkStatus f if isSymbolicLink s - then stagesymlink uh hasher f =<< liftIO (readSymbolicLink f) + then stagesymlink uh f =<< liftIO (readSymbolicLink f) else do - sha <- liftIO $ Git.HashObject.hashFile hasher f + sha <- hashFile f let blobtype = if isExecutable (fileMode s) then ExecutableBlob else FileBlob liftIO . Git.UpdateIndex.streamUpdateIndex' uh =<< inRepo (Git.UpdateIndex.stageFile sha blobtype f) | otherwise = noop - stagesymlink uh hasher f linktarget = do - sha <- hashSymlink' hasher linktarget + stagesymlink uh f linktarget = do + sha <- hashSymlink linktarget liftIO . Git.UpdateIndex.streamUpdateIndex' uh =<< inRepo (Git.UpdateIndex.stageSymlink f sha) diff --git a/debian/changelog b/debian/changelog index e2c13be40..d5b98c192 100644 --- a/debian/changelog +++ b/debian/changelog @@ -13,6 +13,8 @@ git-annex (6.20160230) UNRELEASED; urgency=medium lost in last release. * Always try to thaw content, even when annex.crippledfilesystem is set. * Correct git-annex info to include unlocked files in v6 repository. + * Sped up git-annex add in direct mode and v6 by using + git hash-object --batch. -- Joey Hess <id@joeyh.name> Mon, 29 Feb 2016 13:00:30 -0400 |