From b9ac5854549636493449fea6830364a01159fbf6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 11 Dec 2011 23:02:25 -0400 Subject: more efficient union merges Tries to avoid generating a new object when the merged content has the same lines that were in the old object. I've noticed some merge commits that only move lines around, like this: - 1323478057.181191s 1 be23c3ac-0ee5-11e0-b185-3b0f9b5b00c5 1323204972.062151s 1 87e06c7a-7388-11e0-ba07-03cdf300bd87 ++1323478057.181191s 1 be23c3ac-0ee5-11e0-b185-3b0f9b5b00c5 Unsure if this will really save anything in practice, since it only looks at one of the two old objects, and maybe I didn't pick the best one. --- Git/UnionMerge.hs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index ddbff6a82..27113c85a 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -104,14 +104,17 @@ mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String) mergeFile info file h repo = case filter (/= nullsha) [Ref asha, Ref bsha] of [] -> return Nothing (sha:[]) -> return $ Just $ update_index_line sha file - shas -> do - content <- L.concat <$> mapM (catObject h) shas - sha <- hashObject (unionmerge content) repo - return $ Just $ update_index_line sha file + (sha:shas) -> do + origcontent <- L.lines <$> catObject h sha + content <- map L.lines <$> mapM (catObject h) shas + let newcontent = nub $ concat $ origcontent:content + newsha <- if (newcontent == origcontent) + then return sha + else hashObject (L.unlines $ newcontent) repo + return $ Just $ update_index_line newsha file where - [_colonamode, _bmode, asha, bsha, _status] = words info + [_colonmode, _bmode, asha, bsha, _status] = words info nullsha = Ref $ replicate shaSize '0' - unionmerge = L.unlines . nub . L.lines {- Injects some content into git, returning its Sha. -} hashObject :: L.ByteString -> Repo -> IO Sha -- cgit v1.2.3