From 7ff89ccfee13dcfe89cbdef83454e880dabd7186 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Sep 2011 23:43:42 -0400 Subject: convert all git read/write functions to use ByteStrings This yields a second or so speedup in unused, find, etc. Seems that even when the ByteString is immediately split and then converted to Strings, it's faster. I may try to push ByteStrings out into more of git-annex gradually, although I suspect most of the time-critical parts are already covered now, and many of the rest rely on libraries that only support Strings. --- Branch.hs | 5 +++-- Command/Unused.hs | 3 ++- Git.hs | 54 ++++++++++++++++++++++++++++++++---------------- Git/ByteString.hs | 62 ------------------------------------------------------- Git/LsFiles.hs | 6 ++---- Git/LsTree.hs | 4 ++-- Git/UnionMerge.hs | 7 +++---- debian/changelog | 2 +- 8 files changed, 49 insertions(+), 94 deletions(-) delete mode 100644 Git/ByteString.hs diff --git a/Branch.hs b/Branch.hs index 92b1fe29e..e4caeece7 100644 --- a/Branch.hs +++ b/Branch.hs @@ -30,6 +30,7 @@ import System.IO import System.IO.Binary import System.Posix.Process import System.Exit +import qualified Data.ByteString.Lazy.Char8 as L import Types.BranchState import qualified Git @@ -181,7 +182,7 @@ siblingBranches :: Annex [String] siblingBranches = do g <- Annex.gitRepo r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name] - return $ map (last . words) (lines r) + return $ map (last . words . L.unpack) (L.lines r) {- Ensures that a given ref has been merged into the index. -} updateRef :: GitRef -> Annex (Maybe String) @@ -196,7 +197,7 @@ updateRef ref Param (name++".."++ref), Params "--oneline -n1" ] - if null diffs + if L.null diffs then return Nothing else do showSideAction $ "merging " ++ Git.refDescribe ref ++ " into " ++ name diff --git a/Command/Unused.hs b/Command/Unused.hs index 0c1ffe603..987f36720 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -16,6 +16,7 @@ import Data.Maybe import System.FilePath import System.Directory import Data.List +import qualified Data.ByteString.Lazy.Char8 as L import Command import Types @@ -172,7 +173,7 @@ excludeReferenced l = do refs = map last . nubBy cmpheads . filter ourbranches . - map words . lines + map words . lines . L.unpack cmpheads a b = head a == head b ourbranchend = '/' : Branch.name ourbranches ws = not $ ourbranchend `isSuffixOf` last ws diff --git a/Git.hs b/Git.hs index 4c4f00e8d..d32aaaa56 100644 --- a/Git.hs +++ b/Git.hs @@ -44,6 +44,7 @@ module Git ( pipeWrite, pipeWriteRead, pipeNullSplit, + pipeNullSplitB, attributes, remotes, remotesAdd, @@ -85,6 +86,7 @@ import Text.Printf import Data.List (isInfixOf, isPrefixOf, isSuffixOf) import System.Exit import System.Posix.Env (setEnv, unsetEnv, getEnv) +import qualified Data.ByteString.Lazy.Char8 as L import Utility import Utility.Path @@ -379,22 +381,41 @@ run repo subcommand params = assertLocal repo $ - Note that this leaves the git process running, and so zombies will - result unless reap is called. -} -pipeRead :: Repo -> [CommandParam] -> IO String +pipeRead :: Repo -> [CommandParam] -> IO L.ByteString pipeRead repo params = assertLocal repo $ do - (_, s) <- pipeFrom "git" $ toCommand $ gitCommandLine repo params - return s + (_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params + hSetBinaryMode h True + L.hGetContents h {- Runs a git subcommand, feeding it input. - You should call either getProcessStatus or forceSuccess on the PipeHandle. -} -pipeWrite :: Repo -> [CommandParam] -> String -> IO PipeHandle -pipeWrite repo params s = assertLocal repo $ - pipeTo "git" (toCommand $ gitCommandLine repo params) s +pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle +pipeWrite repo params s = assertLocal repo $ do + (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params) + L.hPut h s + hClose h + return p {- Runs a git subcommand, feeding it input, and returning its output. - You should call either getProcessStatus or forceSuccess on the PipeHandle. -} -pipeWriteRead :: Repo -> [CommandParam] -> String -> IO (PipeHandle, String) -pipeWriteRead repo params s = assertLocal repo $ - pipeBoth "git" (toCommand $ gitCommandLine repo params) s +pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString) +pipeWriteRead repo params s = assertLocal repo $ do + (p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params) + hSetBinaryMode from True + L.hPut to s + hClose to + c <- L.hGetContents from + return (p, c) + +{- Reads null terminated output of a git command (as enabled by the -z + - parameter), and splits it. -} +pipeNullSplit :: Repo -> [CommandParam] -> IO [String] +pipeNullSplit repo params = map L.unpack <$> pipeNullSplitB repo params + +{- For when Strings are not needed. -} +pipeNullSplitB :: Repo -> [CommandParam] -> IO [L.ByteString] +pipeNullSplitB repo params = filter (not . L.null) . L.split '\0' <$> + pipeRead repo params {- Reaps any zombie git processes. -} reap :: IO () @@ -436,21 +457,18 @@ shaSize = 40 - with the specified parent refs. -} commit :: Repo -> String -> String -> [String] -> IO () commit g message newref parentrefs = do - tree <- getSha "write-tree" $ + tree <- getSha "write-tree" $ asString $ pipeRead g [Param "write-tree"] - sha <- getSha "commit-tree" $ ignorehandle $ - pipeWriteRead g (map Param $ ["commit-tree", tree] ++ ps) message + sha <- getSha "commit-tree" $ asString $ + ignorehandle $ pipeWriteRead g + (map Param $ ["commit-tree", tree] ++ ps) + (L.pack message) run g "update-ref" [Param newref, Param sha] where ignorehandle a = snd <$> a + asString a = L.unpack <$> a ps = concatMap (\r -> ["-p", r]) parentrefs -{- Reads null terminated output of a git command (as enabled by the -z - - parameter), and splits it. -} -pipeNullSplit :: Repo -> [CommandParam] -> IO [String] -pipeNullSplit repo params = filter (not . null) . split "\0" <$> - pipeRead repo params - {- Runs git config and populates a repo with its config. -} configRead :: Repo -> IO Repo configRead repo@(Repo { location = Dir d }) = do diff --git a/Git/ByteString.hs b/Git/ByteString.hs deleted file mode 100644 index 4eb6a4876..000000000 --- a/Git/ByteString.hs +++ /dev/null @@ -1,62 +0,0 @@ -{- module using Data.ByteString.Lazy.Char8 for git IO - - - - This can be imported instead of Git when more efficient ByteString IO - - is needed. - - - - Copyright 2011 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -module Git.ByteString ( - module Git, - pipeRead, - pipeWrite, - pipeWriteRead, - pipeNullSplit -) where - -import Control.Applicative -import System.Cmd.Utils -import System.IO -import qualified Data.ByteString.Lazy.Char8 as L - -import Git hiding (pipeRead, pipeWrite, pipeWriteRead, pipeNullSplit) -import Utility.SafeCommand - -{- Runs a git subcommand and returns its output, lazily. - - - - Note that this leaves the git process running, and so zombies will - - result unless reap is called. - -} -pipeRead :: Repo -> [CommandParam] -> IO L.ByteString -pipeRead repo params = assertLocal repo $ do - (_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params - hSetBinaryMode h True - L.hGetContents h - -{- Runs a git subcommand, feeding it input. - - You should call either getProcessStatus or forceSuccess on the PipeHandle. -} -pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle -pipeWrite repo params s = assertLocal repo $ do - (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params) - L.hPut h s - hClose h - return p - -{- Runs a git subcommand, feeding it input, and returning its output. - - You should call either getProcessStatus or forceSuccess on the PipeHandle. -} -pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString) -pipeWriteRead repo params s = assertLocal repo $ do - (p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params) - hSetBinaryMode from True - L.hPut to s - hClose to - c <- L.hGetContents from - return (p, c) - -{- Reads null terminated output of a git command (as enabled by the -z - - parameter), and splits it. -} -pipeNullSplit :: Repo -> [CommandParam] -> IO [L.ByteString] -pipeNullSplit repo params = filter (not . L.null) . L.split '\0' <$> - pipeRead repo params diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index c778e5d69..28e007a4d 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -20,13 +20,11 @@ import Utility.SafeCommand {- Scans for files that are checked into git at the specified locations. -} inRepo :: Repo -> [FilePath] -> IO [FilePath] -inRepo repo l = pipeNullSplit repo $ - Params "ls-files --cached -z --" : map File l +inRepo repo l = pipeNullSplit repo $ Params "ls-files --cached -z --" : map File l {- Scans for files at the specified locations that are not checked into git. -} notInRepo :: Repo -> Bool -> [FilePath] -> IO [FilePath] -notInRepo repo include_ignored l = - pipeNullSplit repo $ +notInRepo repo include_ignored l = pipeNullSplit repo $ [Params "ls-files --others"] ++ exclude ++ [Params "-z --"] ++ map File l where diff --git a/Git/LsTree.hs b/Git/LsTree.hs index e0aa5a443..c072ef5be 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -16,7 +16,7 @@ import Control.Applicative import System.Posix.Types import qualified Data.ByteString.Lazy.Char8 as L -import Git.ByteString +import Git import Utility.SafeCommand type Treeish = String @@ -31,7 +31,7 @@ data TreeItem = TreeItem {- Lists the contents of a Treeish -} lsTree :: Repo -> Treeish -> IO [TreeItem] lsTree repo t = map parseLsTree <$> - pipeNullSplit repo [Params "ls-tree --full-tree -z -r --", File t] + pipeNullSplitB repo [Params "ls-tree --full-tree -z -r --", File t] {- Parses a line of ls-tree output. - (The --long format is not currently supported.) -} diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index a2b85dbdc..ac002b374 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -19,7 +19,6 @@ import Data.String.Utils import qualified Data.ByteString.Lazy.Char8 as L import Git -import qualified Git.ByteString as GitB import Utility.SafeCommand {- Performs a union merge between two branches, staging it in the index. @@ -44,7 +43,7 @@ merge _ _ = error "wrong number of branches to merge" update_index :: Repo -> [String] -> IO () update_index g l = togit ["update-index", "-z", "--index-info"] (join "\0" l) where - togit ps content = pipeWrite g (map Param ps) content + togit ps content = pipeWrite g (map Param ps) (L.pack content) >>= forceSuccess {- Generates a line suitable to be fed into update-index, to add @@ -83,7 +82,7 @@ calc_merge g differ = do {- Injects some content into git, returning its hash. -} hashObject :: Repo -> L.ByteString -> IO String hashObject repo content = getSha subcmd $ do - (h, s) <- GitB.pipeWriteRead repo (map Param params) content + (h, s) <- pipeWriteRead repo (map Param params) content L.length s `seq` do forceSuccess h reap -- XXX unsure why this is needed @@ -100,7 +99,7 @@ mergeFile g (info, file) = case filter (/= nullsha) [asha, bsha] of [] -> return Nothing (sha:[]) -> return $ Just $ update_index_line sha file shas -> do - content <- GitB.pipeRead g $ map Param ("show":shas) + content <- pipeRead g $ map Param ("show":shas) sha <- hashObject g $ unionmerge content return $ Just $ update_index_line sha file where diff --git a/debian/changelog b/debian/changelog index b793101e1..62b65333d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,6 @@ git-annex (3.20110929) UNRELEASED; urgency=low - * Sped up unused. + * Various speed improvements gained by using ByteStrings. -- Joey Hess Thu, 29 Sep 2011 18:58:53 -0400 -- cgit v1.2.3