From 90db9740d0d64b34f2e46f57f8aaca235088734c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 14 Mar 2016 15:58:46 -0400 Subject: Sped up git-annex add in direct mode and v6 by using git hash-object --batch. Speeds up hashSymlink and hashPointerFile. --- Annex/Concurrent.hs | 2 ++ Annex/HashObject.hs | 19 ++++++------------- Annex/Link.hs | 12 +++--------- Annex/View.hs | 20 +++++++++----------- 4 files changed, 20 insertions(+), 33 deletions(-) (limited to 'Annex') diff --git a/Annex/Concurrent.hs b/Annex/Concurrent.hs index d5809df45..ee19d4766 100644 --- a/Annex/Concurrent.hs +++ b/Annex/Concurrent.hs @@ -11,6 +11,7 @@ import Annex.Common import Annex import Annex.CatFile import Annex.CheckAttr +import Annex.HashObject import Annex.CheckIgnore import qualified Annex.Queue @@ -64,4 +65,5 @@ mergeState st = do closehandles = do catFileStop checkAttrStop + hashObjectStop checkIgnoreStop diff --git a/Annex/HashObject.hs b/Annex/HashObject.hs index aa8c2a174..16f741407 100644 --- a/Annex/HashObject.hs +++ b/Annex/HashObject.hs @@ -12,38 +12,31 @@ module Annex.HashObject ( hashObjectStop, ) where -import qualified Data.ByteString.Lazy as L -import qualified Data.Map as M -import System.PosixCompat.Types - import Annex.Common -import qualified Git import qualified Git.HashObject import qualified Annex import Git.Types -import Git.FilePath -import qualified Git.Ref -import Annex.Link hashObjectHandle :: Annex Git.HashObject.HashObjectHandle hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle where startup = do - inRepo $ Git.hashObjectStart + h <- inRepo $ Git.HashObject.hashObjectStart Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h } return h hashObjectStop :: Annex () -hashObjectStop = maybe noop stop =<< Annex.hashobjecthandle +hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle where stop h = do - liftIO $ Git.hashObjectStop h + liftIO $ Git.HashObject.hashObjectStop h Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing } + return () hashFile :: FilePath -> Annex Sha hashFile f = do h <- hashObjectHandle - Git.HashObject.hashFile h f + liftIO $ Git.HashObject.hashFile h f {- Note that the content will be written to a temp file. - So it may be faster to use Git.HashObject.hashObject for large @@ -51,4 +44,4 @@ hashFile f = do hashBlob :: String -> Annex Sha hashBlob content = do h <- hashObjectHandle - Git.HashObject.hashFile h content + liftIO $ Git.HashObject.hashBlob h content diff --git a/Annex/Link.hs b/Annex/Link.hs index 629e77d7f..b191bce6f 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -18,11 +18,11 @@ module Annex.Link where import Annex.Common import qualified Annex -import qualified Git.HashObject import qualified Git.UpdateIndex import qualified Annex.Queue import Git.Types import Git.FilePath +import Annex.HashObject import qualified Data.ByteString.Lazy as L import Data.Int @@ -105,12 +105,7 @@ addAnnexLink linktarget file = do {- Injects a symlink target into git, returning its Sha. -} hashSymlink :: LinkTarget -> Annex Sha -hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject $ - toInternalGitPath linktarget - -hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha -hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $ - toInternalGitPath linktarget +hashSymlink linktarget = hashBlob (toInternalGitPath linktarget) {- Stages a symlink to an annexed object, using a Sha of its target. -} stageSymlink :: FilePath -> Sha -> Annex () @@ -120,8 +115,7 @@ stageSymlink file sha = {- Injects a pointer file content into git, returning its Sha. -} hashPointerFile :: Key -> Annex Sha -hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $ - formatPointer key +hashPointerFile key = hashBlob (formatPointer key) {- Stages a pointer file, using a Sha of its content -} stagePointerFile :: FilePath -> Sha -> Annex () diff --git a/Annex/View.hs b/Annex/View.hs index 14c3eccad..0078c2cad 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -19,7 +19,7 @@ import qualified Git.LsFiles import qualified Git.Ref import Git.UpdateIndex import Git.Sha -import Git.HashObject +import Annex.HashObject import Git.Types import Git.FilePath import Annex.WorkTree @@ -340,38 +340,36 @@ applyView' mkviewedfile getfilemetadata view = do (l, clean) <- inRepo $ Git.LsFiles.inRepo [top] liftIO . nukeFile =<< fromRepo gitAnnexViewIndex uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex - hasher <- inRepo hashObjectStart forM_ l $ \f -> do relf <- getTopFilePath <$> inRepo (toTopFilePath f) - go uh hasher relf =<< lookupFile f + go uh relf =<< lookupFile f liftIO $ do - hashObjectStop hasher void $ stopUpdateIndex uh void clean genViewBranch view where genviewedfiles = viewedFiles view mkviewedfile -- enables memoization - go uh hasher f (Just k) = do + go uh f (Just k) = do metadata <- getCurrentMetaData k let metadata' = getfilemetadata f `unionMetaData` metadata forM_ (genviewedfiles f metadata') $ \fv -> do f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv - stagesymlink uh hasher f' =<< calcRepo (gitAnnexLink f' k) - go uh hasher f Nothing + stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k) + go uh f Nothing | "." `isPrefixOf` f = do s <- liftIO $ getSymbolicLinkStatus f if isSymbolicLink s - then stagesymlink uh hasher f =<< liftIO (readSymbolicLink f) + then stagesymlink uh f =<< liftIO (readSymbolicLink f) else do - sha <- liftIO $ Git.HashObject.hashFile hasher f + sha <- hashFile f let blobtype = if isExecutable (fileMode s) then ExecutableBlob else FileBlob liftIO . Git.UpdateIndex.streamUpdateIndex' uh =<< inRepo (Git.UpdateIndex.stageFile sha blobtype f) | otherwise = noop - stagesymlink uh hasher f linktarget = do - sha <- hashSymlink' hasher linktarget + stagesymlink uh f linktarget = do + sha <- hashSymlink linktarget liftIO . Git.UpdateIndex.streamUpdateIndex' uh =<< inRepo (Git.UpdateIndex.stageSymlink f sha) -- cgit v1.2.3