summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-12-12 01:33:02 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-12-12 01:59:29 -0400
commitacd7a52dfd2cad24fd946ffcf8c4b1d07eb474ce (patch)
tree7a93b0f04d270fcc479ec89ff68eed6430270dc2
parent0cbab5de657e025057dd10b087a874d6b3a7b13e (diff)
always find optimal merge
Testing b9ac5854549636493449fea6830364a01159fbf6, it didn't find the optimal union merge, the second sha was the one to use, at least in the case I tried. Let's just try all shas to see if any can be reused. I stopped using the expensive nub, so despite the use of sets to sort/uniq file contents, this is probably as fast or faster than it was before.
-rw-r--r--Git/UnionMerge.hs34
-rw-r--r--debian/changelog6
2 files changed, 26 insertions, 14 deletions
diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs
index 89fcf83e0..0345af399 100644
--- a/Git/UnionMerge.hs
+++ b/Git/UnionMerge.hs
@@ -15,8 +15,8 @@ module Git.UnionMerge (
) where
import System.Cmd.Utils
-import Data.List
import qualified Data.ByteString.Lazy.Char8 as L
+import qualified Data.Set as S
import Common
import Git
@@ -103,22 +103,14 @@ calc_merge ch differ repo streamer = gendiff >>= go
mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String)
mergeFile info file h repo = case filter (/= nullsha) [Ref asha, Ref bsha] of
[] -> return Nothing
- (sha:[]) -> return $ Just $ update_index_line sha file
- (sha:shas) -> do
- newsha <- maybe (return sha) (hashObject repo . L.unlines) =<<
- unionmerge
- <$> (L.lines <$> catObject h sha)
- <*> (map L.lines <$> mapM (catObject h) shas)
- return $ Just $ update_index_line newsha file
+ (sha:[]) -> use sha
+ shas -> use =<< either return (hashObject repo . L.unlines) =<<
+ calcMerge . zip shas <$> mapM getcontents shas
where
[_colonmode, _bmode, asha, bsha, _status] = words info
nullsha = Ref $ replicate shaSize '0'
-
- unionmerge origcontent content
- | newcontent == origcontent = Nothing
- | otherwise = Just newcontent
- where
- newcontent = nub $ concat $ origcontent:content
+ getcontents s = L.lines <$> catObject h s
+ use sha = return $ Just $ update_index_line sha file
{- Injects some content into git, returning its Sha. -}
hashObject :: Repo -> L.ByteString -> IO Sha
@@ -131,3 +123,17 @@ hashObject repo content = getSha subcmd $ do
where
subcmd = "hash-object"
params = [subcmd, "-w", "--stdin"]
+
+{- Calculates a union merge between a list of refs, with contents.
+ -
+ - When possible, reuses the content of an existing ref, rather than
+ - generating new content.
+ -}
+calcMerge :: [(Ref, [L.ByteString])] -> Either Ref [L.ByteString]
+calcMerge shacontents
+ | null reuseable = Right $ new
+ | otherwise = Left $ fst $ head reuseable
+ where
+ reuseable = filter (\c -> sorteduniq (snd c) == new) shacontents
+ new = sorteduniq $ concat $ map snd shacontents
+ sorteduniq = S.toList . S.fromList
diff --git a/debian/changelog b/debian/changelog
index b676c3c3d..db23decbb 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+git-annex (3.20111212) UNRELEASED; urgency=low
+
+ * Union merge now finds the least expensive way to represent the merge.
+
+ -- Joey Hess <joeyh@debian.org> Mon, 12 Dec 2011 01:57:49 -0400
+
git-annex (3.20111211) unstable; urgency=medium
* Fix bug in last version in getting contents from bare repositories.