From 21a925dcf1ebe088b5c64da0ce159ffb6d535f04 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Nov 2011 23:28:01 -0400 Subject: merge: Now runs in constant space. Before, a merge was first calculated, by running various actions that called git and built up a list of lines, which were at the end sent to git update-index. This necessarily used space proportional to the size of the diff between the trees being merged. Now, lines are streamed into git update-index from each of the actions in turn. Runtime size of git-annex merge when merging 50000 location log files drops from around 100 mb to a constant 4 mb. Presumably it runs quite a lot faster, too. --- Git/UnionMerge.hs | 55 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 24 deletions(-) (limited to 'Git') diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index 493411a06..c86d69979 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -9,13 +9,13 @@ module Git.UnionMerge ( merge, merge_index, update_index, + update_index_via, update_index_line, ls_tree ) where import System.Cmd.Utils import Data.List -import Data.Maybe import qualified Data.ByteString.Lazy.Char8 as L import Common @@ -29,47 +29,56 @@ import Git.CatFile -} merge :: String -> String -> Repo -> IO () merge x y repo = do - a <- ls_tree x repo h <- catFileStart repo - b <- merge_trees x y h repo + update_index_via repo + [ ls_tree x repo + , merge_trees x y h repo + ] catFileStop h - update_index repo (a++b) {- Merges a list of branches into the index. Previously staged changed in - the index are preserved (and participate in the merge). -} merge_index :: CatFileHandle -> Repo -> [String] -> IO () merge_index h repo bs = - update_index repo =<< concat <$> mapM (\b -> merge_tree_index b h repo) bs + update_index_via repo $ map (\b -> merge_tree_index b h repo) bs -{- Feeds a list into update-index. Later items in the list can override +update_index :: Repo -> [String] -> IO () +update_index repo ls = update_index_via repo [\h -> mapM_ (sendContent h) ls] + +{- Feeds content into update-index. Later items in the list can override - earlier ones, so the list can be generated from any combination of - ls_tree, merge_trees, and merge_tree_index. -} -update_index :: Repo -> [String] -> IO () -update_index repo l = do +update_index_via :: Repo -> [Handle -> IO ()] -> IO () +update_index_via repo ls = do (p, h) <- hPipeTo "git" (toCommand $ Git.gitCommandLine params repo) - mapM_ (\s -> hPutStr h s >> hPutStr h "\0") l + forM_ ls $ \l -> l h hClose h forceSuccess p where params = map Param ["update-index", "-z", "--index-info"] +sendContent :: Handle -> String -> IO () +sendContent h s = do + hPutStr h s + hPutStr h "\0" + {- Generates a line suitable to be fed into update-index, to add - a given file with a given sha. -} update_index_line :: String -> FilePath -> String update_index_line sha file = "100644 blob " ++ sha ++ "\t" ++ file -{- Gets the contents of a tree in a format suitable for update_index. -} -ls_tree :: String -> Repo -> IO [String] -ls_tree x = pipeNullSplit params +{- Gets the contents of a tree. -} +ls_tree :: String -> Repo -> Handle -> IO () +ls_tree x repo h = mapM_ (sendContent h) =<< pipeNullSplit params repo where params = map Param ["ls-tree", "-z", "-r", "--full-tree", x] {- For merging two trees. -} -merge_trees :: String -> String -> CatFileHandle -> Repo -> IO [String] +merge_trees :: String -> String -> CatFileHandle -> Repo -> Handle -> IO () merge_trees x y h = calc_merge h $ "diff-tree":diff_opts ++ [x, y] {- For merging a single tree into the index. -} -merge_tree_index :: String -> CatFileHandle -> Repo -> IO [String] +merge_tree_index :: String -> CatFileHandle -> Repo -> Handle -> IO () merge_tree_index x h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x] diff_opts :: [String] @@ -77,21 +86,19 @@ diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"] {- Calculates how to perform a merge, using git to get a raw diff, - and returning a list suitable for update_index. -} -calc_merge :: CatFileHandle -> [String] -> Repo -> IO [String] -calc_merge h differ repo = do - diff <- pipeNullSplit (map Param differ) repo - l <- mapM (\p -> mergeFile p h repo) (pairs diff) - return $ catMaybes l +calc_merge :: CatFileHandle -> [String] -> Repo -> Handle -> IO () +calc_merge ch differ repo ih = pipeNullSplit (map Param differ) repo >>= go where - pairs [] = [] - pairs (_:[]) = error "calc_merge parse error" - pairs (a:b:rest) = (a,b):pairs rest + go [] = return () + go (info:file:rest) = mergeFile info file ch repo >>= + maybe (go rest) (\l -> sendContent ih l >> go rest) + go (_:[]) = error "calc_merge parse error" {- Given an info line from a git raw diff, and the filename, generates - a line suitable for update_index that union merges the two sides of the - diff. -} -mergeFile :: (String, FilePath) -> CatFileHandle -> Repo -> IO (Maybe String) -mergeFile (info, file) h repo = case filter (/= nullsha) [asha, bsha] of +mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String) +mergeFile info file h repo = case filter (/= nullsha) [asha, bsha] of [] -> return Nothing (sha:[]) -> return $ Just $ update_index_line sha file shas -> do -- cgit v1.2.3