summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-03-14 15:58:46 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-03-14 15:58:46 -0400
commit90db9740d0d64b34f2e46f57f8aaca235088734c (patch)
treeac4d7e1b03bfcc337d02569acb87a1481f610746
parent9b29bd39c8dbf23bdf6930b51aba13992ccc49de (diff)
Sped up git-annex add in direct mode and v6 by using git hash-object --batch.
Speeds up hashSymlink and hashPointerFile.
-rw-r--r--Annex.hs3
-rw-r--r--Annex/Concurrent.hs2
-rw-r--r--Annex/HashObject.hs19
-rw-r--r--Annex/Link.hs12
-rw-r--r--Annex/View.hs20
-rw-r--r--debian/changelog2
6 files changed, 25 insertions, 33 deletions
diff --git a/Annex.hs b/Annex.hs
index 6a3d0cebb..fe6802776 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -42,6 +42,7 @@ import qualified Git
import qualified Git.Config
import Annex.Fixup
import Git.CatFile
+import Git.HashObject
import Git.CheckAttr
import Git.CheckIgnore
import qualified Git.Hook
@@ -106,6 +107,7 @@ data AnnexState = AnnexState
, branchstate :: BranchState
, repoqueue :: Maybe Git.Queue.Queue
, catfilehandles :: M.Map FilePath CatFileHandle
+ , hashobjecthandle :: Maybe HashObjectHandle
, checkattrhandle :: Maybe CheckAttrHandle
, checkignorehandle :: Maybe (Maybe CheckIgnoreHandle)
, forcebackend :: Maybe String
@@ -151,6 +153,7 @@ newState c r = AnnexState
, branchstate = startBranchState
, repoqueue = Nothing
, catfilehandles = M.empty
+ , hashobjecthandle = Nothing
, checkattrhandle = Nothing
, checkignorehandle = Nothing
, forcebackend = Nothing
diff --git a/Annex/Concurrent.hs b/Annex/Concurrent.hs
index d5809df45..ee19d4766 100644
--- a/Annex/Concurrent.hs
+++ b/Annex/Concurrent.hs
@@ -11,6 +11,7 @@ import Annex.Common
import Annex
import Annex.CatFile
import Annex.CheckAttr
+import Annex.HashObject
import Annex.CheckIgnore
import qualified Annex.Queue
@@ -64,4 +65,5 @@ mergeState st = do
closehandles = do
catFileStop
checkAttrStop
+ hashObjectStop
checkIgnoreStop
diff --git a/Annex/HashObject.hs b/Annex/HashObject.hs
index aa8c2a174..16f741407 100644
--- a/Annex/HashObject.hs
+++ b/Annex/HashObject.hs
@@ -12,38 +12,31 @@ module Annex.HashObject (
hashObjectStop,
) where
-import qualified Data.ByteString.Lazy as L
-import qualified Data.Map as M
-import System.PosixCompat.Types
-
import Annex.Common
-import qualified Git
import qualified Git.HashObject
import qualified Annex
import Git.Types
-import Git.FilePath
-import qualified Git.Ref
-import Annex.Link
hashObjectHandle :: Annex Git.HashObject.HashObjectHandle
hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle
where
startup = do
- inRepo $ Git.hashObjectStart
+ h <- inRepo $ Git.HashObject.hashObjectStart
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h }
return h
hashObjectStop :: Annex ()
-hashObjectStop = maybe noop stop =<< Annex.hashobjecthandle
+hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
where
stop h = do
- liftIO $ Git.hashObjectStop h
+ liftIO $ Git.HashObject.hashObjectStop h
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
+ return ()
hashFile :: FilePath -> Annex Sha
hashFile f = do
h <- hashObjectHandle
- Git.HashObject.hashFile h f
+ liftIO $ Git.HashObject.hashFile h f
{- Note that the content will be written to a temp file.
- So it may be faster to use Git.HashObject.hashObject for large
@@ -51,4 +44,4 @@ hashFile f = do
hashBlob :: String -> Annex Sha
hashBlob content = do
h <- hashObjectHandle
- Git.HashObject.hashFile h content
+ liftIO $ Git.HashObject.hashBlob h content
diff --git a/Annex/Link.hs b/Annex/Link.hs
index 629e77d7f..b191bce6f 100644
--- a/Annex/Link.hs
+++ b/Annex/Link.hs
@@ -18,11 +18,11 @@ module Annex.Link where
import Annex.Common
import qualified Annex
-import qualified Git.HashObject
import qualified Git.UpdateIndex
import qualified Annex.Queue
import Git.Types
import Git.FilePath
+import Annex.HashObject
import qualified Data.ByteString.Lazy as L
import Data.Int
@@ -105,12 +105,7 @@ addAnnexLink linktarget file = do
{- Injects a symlink target into git, returning its Sha. -}
hashSymlink :: LinkTarget -> Annex Sha
-hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject $
- toInternalGitPath linktarget
-
-hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha
-hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $
- toInternalGitPath linktarget
+hashSymlink linktarget = hashBlob (toInternalGitPath linktarget)
{- Stages a symlink to an annexed object, using a Sha of its target. -}
stageSymlink :: FilePath -> Sha -> Annex ()
@@ -120,8 +115,7 @@ stageSymlink file sha =
{- Injects a pointer file content into git, returning its Sha. -}
hashPointerFile :: Key -> Annex Sha
-hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $
- formatPointer key
+hashPointerFile key = hashBlob (formatPointer key)
{- Stages a pointer file, using a Sha of its content -}
stagePointerFile :: FilePath -> Sha -> Annex ()
diff --git a/Annex/View.hs b/Annex/View.hs
index 14c3eccad..0078c2cad 100644
--- a/Annex/View.hs
+++ b/Annex/View.hs
@@ -19,7 +19,7 @@ import qualified Git.LsFiles
import qualified Git.Ref
import Git.UpdateIndex
import Git.Sha
-import Git.HashObject
+import Annex.HashObject
import Git.Types
import Git.FilePath
import Annex.WorkTree
@@ -340,38 +340,36 @@ applyView' mkviewedfile getfilemetadata view = do
(l, clean) <- inRepo $ Git.LsFiles.inRepo [top]
liftIO . nukeFile =<< fromRepo gitAnnexViewIndex
uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex
- hasher <- inRepo hashObjectStart
forM_ l $ \f -> do
relf <- getTopFilePath <$> inRepo (toTopFilePath f)
- go uh hasher relf =<< lookupFile f
+ go uh relf =<< lookupFile f
liftIO $ do
- hashObjectStop hasher
void $ stopUpdateIndex uh
void clean
genViewBranch view
where
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
- go uh hasher f (Just k) = do
+ go uh f (Just k) = do
metadata <- getCurrentMetaData k
let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do
f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv
- stagesymlink uh hasher f' =<< calcRepo (gitAnnexLink f' k)
- go uh hasher f Nothing
+ stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k)
+ go uh f Nothing
| "." `isPrefixOf` f = do
s <- liftIO $ getSymbolicLinkStatus f
if isSymbolicLink s
- then stagesymlink uh hasher f =<< liftIO (readSymbolicLink f)
+ then stagesymlink uh f =<< liftIO (readSymbolicLink f)
else do
- sha <- liftIO $ Git.HashObject.hashFile hasher f
+ sha <- hashFile f
let blobtype = if isExecutable (fileMode s)
then ExecutableBlob
else FileBlob
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageFile sha blobtype f)
| otherwise = noop
- stagesymlink uh hasher f linktarget = do
- sha <- hashSymlink' hasher linktarget
+ stagesymlink uh f linktarget = do
+ sha <- hashSymlink linktarget
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageSymlink f sha)
diff --git a/debian/changelog b/debian/changelog
index e2c13be40..d5b98c192 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -13,6 +13,8 @@ git-annex (6.20160230) UNRELEASED; urgency=medium
lost in last release.
* Always try to thaw content, even when annex.crippledfilesystem is set.
* Correct git-annex info to include unlocked files in v6 repository.
+ * Sped up git-annex add in direct mode and v6 by using
+ git hash-object --batch.
-- Joey Hess <id@joeyh.name> Mon, 29 Feb 2016 13:00:30 -0400