summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-09-29 23:43:42 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-09-29 23:48:57 -0400
commit7ff89ccfee13dcfe89cbdef83454e880dabd7186 (patch)
tree953858f36ccd021832d286c6a5915e44a05081ce
parent949ef94d5e5583e55d6ba9797cf71177b252173d (diff)
convert all git read/write functions to use ByteStrings
This yields a second or so speedup in unused, find, etc. Seems that even when the ByteString is immediately split and then converted to Strings, it's faster. I may try to push ByteStrings out into more of git-annex gradually, although I suspect most of the time-critical parts are already covered now, and many of the rest rely on libraries that only support Strings.
-rw-r--r--Branch.hs5
-rw-r--r--Command/Unused.hs3
-rw-r--r--Git.hs54
-rw-r--r--Git/ByteString.hs62
-rw-r--r--Git/LsFiles.hs6
-rw-r--r--Git/LsTree.hs4
-rw-r--r--Git/UnionMerge.hs7
-rw-r--r--debian/changelog2
8 files changed, 49 insertions, 94 deletions
diff --git a/Branch.hs b/Branch.hs
index 92b1fe29e..e4caeece7 100644
--- a/Branch.hs
+++ b/Branch.hs
@@ -30,6 +30,7 @@ import System.IO
import System.IO.Binary
import System.Posix.Process
import System.Exit
+import qualified Data.ByteString.Lazy.Char8 as L
import Types.BranchState
import qualified Git
@@ -181,7 +182,7 @@ siblingBranches :: Annex [String]
siblingBranches = do
g <- Annex.gitRepo
r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name]
- return $ map (last . words) (lines r)
+ return $ map (last . words . L.unpack) (L.lines r)
{- Ensures that a given ref has been merged into the index. -}
updateRef :: GitRef -> Annex (Maybe String)
@@ -196,7 +197,7 @@ updateRef ref
Param (name++".."++ref),
Params "--oneline -n1"
]
- if null diffs
+ if L.null diffs
then return Nothing
else do
showSideAction $ "merging " ++ Git.refDescribe ref ++ " into " ++ name
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 0c1ffe603..987f36720 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -16,6 +16,7 @@ import Data.Maybe
import System.FilePath
import System.Directory
import Data.List
+import qualified Data.ByteString.Lazy.Char8 as L
import Command
import Types
@@ -172,7 +173,7 @@ excludeReferenced l = do
refs = map last .
nubBy cmpheads .
filter ourbranches .
- map words . lines
+ map words . lines . L.unpack
cmpheads a b = head a == head b
ourbranchend = '/' : Branch.name
ourbranches ws = not $ ourbranchend `isSuffixOf` last ws
diff --git a/Git.hs b/Git.hs
index 4c4f00e8d..d32aaaa56 100644
--- a/Git.hs
+++ b/Git.hs
@@ -44,6 +44,7 @@ module Git (
pipeWrite,
pipeWriteRead,
pipeNullSplit,
+ pipeNullSplitB,
attributes,
remotes,
remotesAdd,
@@ -85,6 +86,7 @@ import Text.Printf
import Data.List (isInfixOf, isPrefixOf, isSuffixOf)
import System.Exit
import System.Posix.Env (setEnv, unsetEnv, getEnv)
+import qualified Data.ByteString.Lazy.Char8 as L
import Utility
import Utility.Path
@@ -379,22 +381,41 @@ run repo subcommand params = assertLocal repo $
- Note that this leaves the git process running, and so zombies will
- result unless reap is called.
-}
-pipeRead :: Repo -> [CommandParam] -> IO String
+pipeRead :: Repo -> [CommandParam] -> IO L.ByteString
pipeRead repo params = assertLocal repo $ do
- (_, s) <- pipeFrom "git" $ toCommand $ gitCommandLine repo params
- return s
+ (_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params
+ hSetBinaryMode h True
+ L.hGetContents h
{- Runs a git subcommand, feeding it input.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
-pipeWrite :: Repo -> [CommandParam] -> String -> IO PipeHandle
-pipeWrite repo params s = assertLocal repo $
- pipeTo "git" (toCommand $ gitCommandLine repo params) s
+pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle
+pipeWrite repo params s = assertLocal repo $ do
+ (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params)
+ L.hPut h s
+ hClose h
+ return p
{- Runs a git subcommand, feeding it input, and returning its output.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
-pipeWriteRead :: Repo -> [CommandParam] -> String -> IO (PipeHandle, String)
-pipeWriteRead repo params s = assertLocal repo $
- pipeBoth "git" (toCommand $ gitCommandLine repo params) s
+pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString)
+pipeWriteRead repo params s = assertLocal repo $ do
+ (p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params)
+ hSetBinaryMode from True
+ L.hPut to s
+ hClose to
+ c <- L.hGetContents from
+ return (p, c)
+
+{- Reads null terminated output of a git command (as enabled by the -z
+ - parameter), and splits it. -}
+pipeNullSplit :: Repo -> [CommandParam] -> IO [String]
+pipeNullSplit repo params = map L.unpack <$> pipeNullSplitB repo params
+
+{- For when Strings are not needed. -}
+pipeNullSplitB :: Repo -> [CommandParam] -> IO [L.ByteString]
+pipeNullSplitB repo params = filter (not . L.null) . L.split '\0' <$>
+ pipeRead repo params
{- Reaps any zombie git processes. -}
reap :: IO ()
@@ -436,21 +457,18 @@ shaSize = 40
- with the specified parent refs. -}
commit :: Repo -> String -> String -> [String] -> IO ()
commit g message newref parentrefs = do
- tree <- getSha "write-tree" $
+ tree <- getSha "write-tree" $ asString $
pipeRead g [Param "write-tree"]
- sha <- getSha "commit-tree" $ ignorehandle $
- pipeWriteRead g (map Param $ ["commit-tree", tree] ++ ps) message
+ sha <- getSha "commit-tree" $ asString $
+ ignorehandle $ pipeWriteRead g
+ (map Param $ ["commit-tree", tree] ++ ps)
+ (L.pack message)
run g "update-ref" [Param newref, Param sha]
where
ignorehandle a = snd <$> a
+ asString a = L.unpack <$> a
ps = concatMap (\r -> ["-p", r]) parentrefs
-{- Reads null terminated output of a git command (as enabled by the -z
- - parameter), and splits it. -}
-pipeNullSplit :: Repo -> [CommandParam] -> IO [String]
-pipeNullSplit repo params = filter (not . null) . split "\0" <$>
- pipeRead repo params
-
{- Runs git config and populates a repo with its config. -}
configRead :: Repo -> IO Repo
configRead repo@(Repo { location = Dir d }) = do
diff --git a/Git/ByteString.hs b/Git/ByteString.hs
deleted file mode 100644
index 4eb6a4876..000000000
--- a/Git/ByteString.hs
+++ /dev/null
@@ -1,62 +0,0 @@
-{- module using Data.ByteString.Lazy.Char8 for git IO
- -
- - This can be imported instead of Git when more efficient ByteString IO
- - is needed.
- -
- - Copyright 2011 Joey Hess <joey@kitenet.net>
- -
- - Licensed under the GNU GPL version 3 or higher.
- -}
-
-module Git.ByteString (
- module Git,
- pipeRead,
- pipeWrite,
- pipeWriteRead,
- pipeNullSplit
-) where
-
-import Control.Applicative
-import System.Cmd.Utils
-import System.IO
-import qualified Data.ByteString.Lazy.Char8 as L
-
-import Git hiding (pipeRead, pipeWrite, pipeWriteRead, pipeNullSplit)
-import Utility.SafeCommand
-
-{- Runs a git subcommand and returns its output, lazily.
- -
- - Note that this leaves the git process running, and so zombies will
- - result unless reap is called.
- -}
-pipeRead :: Repo -> [CommandParam] -> IO L.ByteString
-pipeRead repo params = assertLocal repo $ do
- (_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params
- hSetBinaryMode h True
- L.hGetContents h
-
-{- Runs a git subcommand, feeding it input.
- - You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
-pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle
-pipeWrite repo params s = assertLocal repo $ do
- (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params)
- L.hPut h s
- hClose h
- return p
-
-{- Runs a git subcommand, feeding it input, and returning its output.
- - You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
-pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString)
-pipeWriteRead repo params s = assertLocal repo $ do
- (p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params)
- hSetBinaryMode from True
- L.hPut to s
- hClose to
- c <- L.hGetContents from
- return (p, c)
-
-{- Reads null terminated output of a git command (as enabled by the -z
- - parameter), and splits it. -}
-pipeNullSplit :: Repo -> [CommandParam] -> IO [L.ByteString]
-pipeNullSplit repo params = filter (not . L.null) . L.split '\0' <$>
- pipeRead repo params
diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs
index c778e5d69..28e007a4d 100644
--- a/Git/LsFiles.hs
+++ b/Git/LsFiles.hs
@@ -20,13 +20,11 @@ import Utility.SafeCommand
{- Scans for files that are checked into git at the specified locations. -}
inRepo :: Repo -> [FilePath] -> IO [FilePath]
-inRepo repo l = pipeNullSplit repo $
- Params "ls-files --cached -z --" : map File l
+inRepo repo l = pipeNullSplit repo $ Params "ls-files --cached -z --" : map File l
{- Scans for files at the specified locations that are not checked into git. -}
notInRepo :: Repo -> Bool -> [FilePath] -> IO [FilePath]
-notInRepo repo include_ignored l =
- pipeNullSplit repo $
+notInRepo repo include_ignored l = pipeNullSplit repo $
[Params "ls-files --others"] ++ exclude ++
[Params "-z --"] ++ map File l
where
diff --git a/Git/LsTree.hs b/Git/LsTree.hs
index e0aa5a443..c072ef5be 100644
--- a/Git/LsTree.hs
+++ b/Git/LsTree.hs
@@ -16,7 +16,7 @@ import Control.Applicative
import System.Posix.Types
import qualified Data.ByteString.Lazy.Char8 as L
-import Git.ByteString
+import Git
import Utility.SafeCommand
type Treeish = String
@@ -31,7 +31,7 @@ data TreeItem = TreeItem
{- Lists the contents of a Treeish -}
lsTree :: Repo -> Treeish -> IO [TreeItem]
lsTree repo t = map parseLsTree <$>
- pipeNullSplit repo [Params "ls-tree --full-tree -z -r --", File t]
+ pipeNullSplitB repo [Params "ls-tree --full-tree -z -r --", File t]
{- Parses a line of ls-tree output.
- (The --long format is not currently supported.) -}
diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs
index a2b85dbdc..ac002b374 100644
--- a/Git/UnionMerge.hs
+++ b/Git/UnionMerge.hs
@@ -19,7 +19,6 @@ import Data.String.Utils
import qualified Data.ByteString.Lazy.Char8 as L
import Git
-import qualified Git.ByteString as GitB
import Utility.SafeCommand
{- Performs a union merge between two branches, staging it in the index.
@@ -44,7 +43,7 @@ merge _ _ = error "wrong number of branches to merge"
update_index :: Repo -> [String] -> IO ()
update_index g l = togit ["update-index", "-z", "--index-info"] (join "\0" l)
where
- togit ps content = pipeWrite g (map Param ps) content
+ togit ps content = pipeWrite g (map Param ps) (L.pack content)
>>= forceSuccess
{- Generates a line suitable to be fed into update-index, to add
@@ -83,7 +82,7 @@ calc_merge g differ = do
{- Injects some content into git, returning its hash. -}
hashObject :: Repo -> L.ByteString -> IO String
hashObject repo content = getSha subcmd $ do
- (h, s) <- GitB.pipeWriteRead repo (map Param params) content
+ (h, s) <- pipeWriteRead repo (map Param params) content
L.length s `seq` do
forceSuccess h
reap -- XXX unsure why this is needed
@@ -100,7 +99,7 @@ mergeFile g (info, file) = case filter (/= nullsha) [asha, bsha] of
[] -> return Nothing
(sha:[]) -> return $ Just $ update_index_line sha file
shas -> do
- content <- GitB.pipeRead g $ map Param ("show":shas)
+ content <- pipeRead g $ map Param ("show":shas)
sha <- hashObject g $ unionmerge content
return $ Just $ update_index_line sha file
where
diff --git a/debian/changelog b/debian/changelog
index b793101e1..62b65333d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,6 @@
git-annex (3.20110929) UNRELEASED; urgency=low
- * Sped up unused.
+ * Various speed improvements gained by using ByteStrings.
-- Joey Hess <joeyh@debian.org> Thu, 29 Sep 2011 18:58:53 -0400