diff options
author | Joey Hess <joey@kitenet.net> | 2011-11-12 17:45:12 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2011-11-12 17:45:12 -0400 |
commit | 04edae6791b4eddaa77dda2407264dc4434d74b7 (patch) | |
tree | 3c42dfa812220f003b53cb47ce4ff1e73dd1f108 | |
parent | cea65b9e5bf6bcc9a9350703dbbb0951c6e00c82 (diff) |
Optimised union merging; now only runs git cat-file once.
-rw-r--r-- | Annex/Branch.hs | 5 | ||||
-rw-r--r-- | Annex/CatFile.hs | 17 | ||||
-rw-r--r-- | Command/Unused.hs | 2 | ||||
-rw-r--r-- | Git/CatFile.hs | 38 | ||||
-rw-r--r-- | Git/UnionMerge.hs | 31 | ||||
-rw-r--r-- | debian/changelog | 1 | ||||
-rw-r--r-- | doc/todo/optimise_git-annex_merge.mdwn | 4 |
7 files changed, 62 insertions, 36 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs index fad818fb0..20134003d 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -149,7 +149,8 @@ update = onceonly $ do - documentation advises users not to directly - modify the branch. -} - inRepo $ \g -> Git.UnionMerge.merge_index g branches + h <- catFileHandle + inRepo $ \g -> Git.UnionMerge.merge_index h g branches ff <- if dirty then return False else tryFastForwardTo refs unless ff $ inRepo $ Git.commit merge_desc fullname (nub $ fullname:refs) @@ -280,7 +281,7 @@ get' staleok file = fromcache =<< getCache file fromjournal Nothing | staleok = withIndex frombranch | otherwise = withIndexUpdate $ frombranch >>= cache - frombranch = catFile fullname file + frombranch = L.unpack <$> catFile fullname file cache content = do setCache file content return content diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 99cc519f5..0541f7269 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -6,18 +6,25 @@ -} module Annex.CatFile ( - catFile + catFile, + catFileHandle ) where +import qualified Data.ByteString.Lazy.Char8 as L + import Common.Annex import qualified Git.CatFile import qualified Annex -catFile :: String -> FilePath -> Annex String -catFile branch file = maybe startup go =<< Annex.getState Annex.catfilehandle +catFile :: String -> FilePath -> Annex L.ByteString +catFile branch file = do + h <- catFileHandle + liftIO $ Git.CatFile.catFile h branch file + +catFileHandle :: Annex Git.CatFile.CatFileHandle +catFileHandle = maybe startup return =<< Annex.getState Annex.catfilehandle where startup = do h <- inRepo Git.CatFile.catFileStart Annex.changeState $ \s -> s { Annex.catfilehandle = Just h } - go h - go h = liftIO $ Git.CatFile.catFile h branch file + return h diff --git a/Command/Unused.hs b/Command/Unused.hs index 9d56d1ff1..34d8ac232 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -197,7 +197,7 @@ getKeysReferencedInGit ref = do findkeys c (l:ls) | isSymLink (LsTree.mode l) = do content <- catFile ref $ LsTree.file l - case fileKey (takeFileName content) of + case fileKey (takeFileName $ L.unpack content) of Nothing -> findkeys c ls Just k -> findkeys (k:c) ls | otherwise = findkeys c ls diff --git a/Git/CatFile.hs b/Git/CatFile.hs index 51fa585a8..83c123508 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -9,13 +9,15 @@ module Git.CatFile ( CatFileHandle, catFileStart, catFileStop, - catFile + catFile, + catObject ) where import Control.Monad.State import System.Cmd.Utils import System.IO -import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Char8 as S +import qualified Data.ByteString.Lazy.Char8 as L import Git import Utility.SafeCommand @@ -34,30 +36,38 @@ catFileStop (pid, from, to) = do hClose from forceSuccess pid -{- Uses a running git cat-file read the content of a file from a branch. - - Files that do not exist on the branch will have "" returned. -} -catFile :: CatFileHandle -> String -> FilePath -> IO String -catFile (_, from, to) branch file = do - hPutStrLn to want +{- Reads a file from a specified branch. -} +catFile :: CatFileHandle -> String -> FilePath -> IO L.ByteString +catFile h branch file = catObject h (branch ++ ":" ++ file) + +{- Uses a running git cat-file read the content of an object. + - Objects that do not exist will have "" returned. -} +catObject :: CatFileHandle -> String -> IO L.ByteString +catObject (_, from, to) object = do + hPutStrLn to object hFlush to header <- hGetLine from case words header of - [sha, blob, size] + [sha, objtype, size] | length sha == Git.shaSize && - blob == "blob" -> handle size + validobjtype objtype -> handle size | otherwise -> empty _ - | header == want ++ " missing" -> empty + | header == object ++ " missing" -> empty | otherwise -> error $ "unknown response from git cat-file " ++ header where - want = branch ++ ":" ++ file handle size = case reads size of [(bytes, "")] -> readcontent bytes _ -> empty readcontent bytes = do - content <- B.hGet from bytes + content <- S.hGet from bytes c <- hGetChar from when (c /= '\n') $ error "missing newline from git cat-file" - return $ B.unpack content - empty = return "" + return $ L.fromChunks [content] + empty = return L.empty + validobjtype t + | t == "blob" = True + | t == "commit" = True + | t == "tree" = True + | otherwise = False diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index 30778d034..67e6fd951 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -21,6 +21,7 @@ import qualified Data.ByteString.Lazy.Char8 as L import Common import Git +import Git.CatFile {- Performs a union merge between two branches, staging it in the index. - Any previously staged changes in the index will be lost. @@ -30,14 +31,16 @@ import Git merge :: String -> String -> Repo -> IO () merge x y repo = do a <- ls_tree x repo - b <- merge_trees x y repo + h <- catFileStart repo + b <- merge_trees x y h repo + catFileStop h update_index repo (a++b) {- Merges a list of branches into the index. Previously staged changed in - the index are preserved (and participate in the merge). -} -merge_index :: Repo -> [String] -> IO () -merge_index repo bs = - update_index repo =<< concat <$> mapM (`merge_tree_index` repo) bs +merge_index :: CatFileHandle -> Repo -> [String] -> IO () +merge_index h repo bs = + update_index repo =<< concat <$> mapM (\b -> merge_tree_index b h repo) bs {- Feeds a list into update-index. Later items in the list can override - earlier ones, so the list can be generated from any combination of @@ -60,22 +63,22 @@ ls_tree x = pipeNullSplit params params = map Param ["ls-tree", "-z", "-r", "--full-tree", x] {- For merging two trees. -} -merge_trees :: String -> String -> Repo -> IO [String] -merge_trees x y = calc_merge $ "diff-tree":diff_opts ++ [x, y] +merge_trees :: String -> String -> CatFileHandle -> Repo -> IO [String] +merge_trees x y h = calc_merge h $ "diff-tree":diff_opts ++ [x, y] {- For merging a single tree into the index. -} -merge_tree_index :: String -> Repo -> IO [String] -merge_tree_index x = calc_merge $ "diff-index":diff_opts ++ ["--cached", x] +merge_tree_index :: String -> CatFileHandle -> Repo -> IO [String] +merge_tree_index x h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x] diff_opts :: [String] diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"] {- Calculates how to perform a merge, using git to get a raw diff, - and returning a list suitable for update_index. -} -calc_merge :: [String] -> Repo -> IO [String] -calc_merge differ repo = do +calc_merge :: CatFileHandle -> [String] -> Repo -> IO [String] +calc_merge h differ repo = do diff <- pipeNullSplit (map Param differ) repo - l <- mapM (\p -> mergeFile p repo) (pairs diff) + l <- mapM (\p -> mergeFile p h repo) (pairs diff) return $ catMaybes l where pairs [] = [] @@ -97,12 +100,12 @@ hashObject content repo = getSha subcmd $ do {- Given an info line from a git raw diff, and the filename, generates - a line suitable for update_index that union merges the two sides of the - diff. -} -mergeFile :: (String, FilePath) -> Repo -> IO (Maybe String) -mergeFile (info, file) repo = case filter (/= nullsha) [asha, bsha] of +mergeFile :: (String, FilePath) -> CatFileHandle -> Repo -> IO (Maybe String) +mergeFile (info, file) h repo = case filter (/= nullsha) [asha, bsha] of [] -> return Nothing (sha:[]) -> return $ Just $ update_index_line sha file shas -> do - content <- pipeRead (map Param ("show":shas)) repo + content <- L.concat <$> mapM (catObject h) shas sha <- hashObject (unionmerge content) repo return $ Just $ update_index_line sha file where diff --git a/debian/changelog b/debian/changelog index a8ce33435..7ff819a14 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,6 +6,7 @@ git-annex (3.20111112) UNRELEASED; urgency=low no longer needs to auto-merge. * init: When run in an already initalized repository, and without a description specified, don't delete the old description. + * Optimised union merging; now only runs git cat-file once. -- Joey Hess <joeyh@debian.org> Sat, 12 Nov 2011 14:50:21 -0400 diff --git a/doc/todo/optimise_git-annex_merge.mdwn b/doc/todo/optimise_git-annex_merge.mdwn index a2cdfb15f..2df196cfd 100644 --- a/doc/todo/optimise_git-annex_merge.mdwn +++ b/doc/todo/optimise_git-annex_merge.mdwn @@ -8,6 +8,10 @@ Instead, I'd like a way to stream multiple objects into git using stdin. Sometime, should look at either extending git-hash-object to support that, or possibly look at using git-fast-import instead. +--- + `git-annex merge` also runs `git show` once per file that needs to be merged. This could be reduced to a single call to `git-cat-file --batch`, There is already a Git.CatFile library that can do this easily. --[[Joey]] + +> This is now done, part above remains todo. --[[Joey]] |