summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-11-12 17:45:12 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-11-12 17:45:12 -0400
commit04edae6791b4eddaa77dda2407264dc4434d74b7 (patch)
tree3c42dfa812220f003b53cb47ce4ff1e73dd1f108
parentcea65b9e5bf6bcc9a9350703dbbb0951c6e00c82 (diff)
Optimised union merging; now only runs git cat-file once.
-rw-r--r--Annex/Branch.hs5
-rw-r--r--Annex/CatFile.hs17
-rw-r--r--Command/Unused.hs2
-rw-r--r--Git/CatFile.hs38
-rw-r--r--Git/UnionMerge.hs31
-rw-r--r--debian/changelog1
-rw-r--r--doc/todo/optimise_git-annex_merge.mdwn4
7 files changed, 62 insertions, 36 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index fad818fb0..20134003d 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -149,7 +149,8 @@ update = onceonly $ do
- documentation advises users not to directly
- modify the branch.
-}
- inRepo $ \g -> Git.UnionMerge.merge_index g branches
+ h <- catFileHandle
+ inRepo $ \g -> Git.UnionMerge.merge_index h g branches
ff <- if dirty then return False else tryFastForwardTo refs
unless ff $ inRepo $
Git.commit merge_desc fullname (nub $ fullname:refs)
@@ -280,7 +281,7 @@ get' staleok file = fromcache =<< getCache file
fromjournal Nothing
| staleok = withIndex frombranch
| otherwise = withIndexUpdate $ frombranch >>= cache
- frombranch = catFile fullname file
+ frombranch = L.unpack <$> catFile fullname file
cache content = do
setCache file content
return content
diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs
index 99cc519f5..0541f7269 100644
--- a/Annex/CatFile.hs
+++ b/Annex/CatFile.hs
@@ -6,18 +6,25 @@
-}
module Annex.CatFile (
- catFile
+ catFile,
+ catFileHandle
) where
+import qualified Data.ByteString.Lazy.Char8 as L
+
import Common.Annex
import qualified Git.CatFile
import qualified Annex
-catFile :: String -> FilePath -> Annex String
-catFile branch file = maybe startup go =<< Annex.getState Annex.catfilehandle
+catFile :: String -> FilePath -> Annex L.ByteString
+catFile branch file = do
+ h <- catFileHandle
+ liftIO $ Git.CatFile.catFile h branch file
+
+catFileHandle :: Annex Git.CatFile.CatFileHandle
+catFileHandle = maybe startup return =<< Annex.getState Annex.catfilehandle
where
startup = do
h <- inRepo Git.CatFile.catFileStart
Annex.changeState $ \s -> s { Annex.catfilehandle = Just h }
- go h
- go h = liftIO $ Git.CatFile.catFile h branch file
+ return h
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 9d56d1ff1..34d8ac232 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -197,7 +197,7 @@ getKeysReferencedInGit ref = do
findkeys c (l:ls)
| isSymLink (LsTree.mode l) = do
content <- catFile ref $ LsTree.file l
- case fileKey (takeFileName content) of
+ case fileKey (takeFileName $ L.unpack content) of
Nothing -> findkeys c ls
Just k -> findkeys (k:c) ls
| otherwise = findkeys c ls
diff --git a/Git/CatFile.hs b/Git/CatFile.hs
index 51fa585a8..83c123508 100644
--- a/Git/CatFile.hs
+++ b/Git/CatFile.hs
@@ -9,13 +9,15 @@ module Git.CatFile (
CatFileHandle,
catFileStart,
catFileStop,
- catFile
+ catFile,
+ catObject
) where
import Control.Monad.State
import System.Cmd.Utils
import System.IO
-import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString.Char8 as S
+import qualified Data.ByteString.Lazy.Char8 as L
import Git
import Utility.SafeCommand
@@ -34,30 +36,38 @@ catFileStop (pid, from, to) = do
hClose from
forceSuccess pid
-{- Uses a running git cat-file read the content of a file from a branch.
- - Files that do not exist on the branch will have "" returned. -}
-catFile :: CatFileHandle -> String -> FilePath -> IO String
-catFile (_, from, to) branch file = do
- hPutStrLn to want
+{- Reads a file from a specified branch. -}
+catFile :: CatFileHandle -> String -> FilePath -> IO L.ByteString
+catFile h branch file = catObject h (branch ++ ":" ++ file)
+
+{- Uses a running git cat-file read the content of an object.
+ - Objects that do not exist will have "" returned. -}
+catObject :: CatFileHandle -> String -> IO L.ByteString
+catObject (_, from, to) object = do
+ hPutStrLn to object
hFlush to
header <- hGetLine from
case words header of
- [sha, blob, size]
+ [sha, objtype, size]
| length sha == Git.shaSize &&
- blob == "blob" -> handle size
+ validobjtype objtype -> handle size
| otherwise -> empty
_
- | header == want ++ " missing" -> empty
+ | header == object ++ " missing" -> empty
| otherwise -> error $ "unknown response from git cat-file " ++ header
where
- want = branch ++ ":" ++ file
handle size = case reads size of
[(bytes, "")] -> readcontent bytes
_ -> empty
readcontent bytes = do
- content <- B.hGet from bytes
+ content <- S.hGet from bytes
c <- hGetChar from
when (c /= '\n') $
error "missing newline from git cat-file"
- return $ B.unpack content
- empty = return ""
+ return $ L.fromChunks [content]
+ empty = return L.empty
+ validobjtype t
+ | t == "blob" = True
+ | t == "commit" = True
+ | t == "tree" = True
+ | otherwise = False
diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs
index 30778d034..67e6fd951 100644
--- a/Git/UnionMerge.hs
+++ b/Git/UnionMerge.hs
@@ -21,6 +21,7 @@ import qualified Data.ByteString.Lazy.Char8 as L
import Common
import Git
+import Git.CatFile
{- Performs a union merge between two branches, staging it in the index.
- Any previously staged changes in the index will be lost.
@@ -30,14 +31,16 @@ import Git
merge :: String -> String -> Repo -> IO ()
merge x y repo = do
a <- ls_tree x repo
- b <- merge_trees x y repo
+ h <- catFileStart repo
+ b <- merge_trees x y h repo
+ catFileStop h
update_index repo (a++b)
{- Merges a list of branches into the index. Previously staged changed in
- the index are preserved (and participate in the merge). -}
-merge_index :: Repo -> [String] -> IO ()
-merge_index repo bs =
- update_index repo =<< concat <$> mapM (`merge_tree_index` repo) bs
+merge_index :: CatFileHandle -> Repo -> [String] -> IO ()
+merge_index h repo bs =
+ update_index repo =<< concat <$> mapM (\b -> merge_tree_index b h repo) bs
{- Feeds a list into update-index. Later items in the list can override
- earlier ones, so the list can be generated from any combination of
@@ -60,22 +63,22 @@ ls_tree x = pipeNullSplit params
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
{- For merging two trees. -}
-merge_trees :: String -> String -> Repo -> IO [String]
-merge_trees x y = calc_merge $ "diff-tree":diff_opts ++ [x, y]
+merge_trees :: String -> String -> CatFileHandle -> Repo -> IO [String]
+merge_trees x y h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
{- For merging a single tree into the index. -}
-merge_tree_index :: String -> Repo -> IO [String]
-merge_tree_index x = calc_merge $ "diff-index":diff_opts ++ ["--cached", x]
+merge_tree_index :: String -> CatFileHandle -> Repo -> IO [String]
+merge_tree_index x h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x]
diff_opts :: [String]
diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"]
{- Calculates how to perform a merge, using git to get a raw diff,
- and returning a list suitable for update_index. -}
-calc_merge :: [String] -> Repo -> IO [String]
-calc_merge differ repo = do
+calc_merge :: CatFileHandle -> [String] -> Repo -> IO [String]
+calc_merge h differ repo = do
diff <- pipeNullSplit (map Param differ) repo
- l <- mapM (\p -> mergeFile p repo) (pairs diff)
+ l <- mapM (\p -> mergeFile p h repo) (pairs diff)
return $ catMaybes l
where
pairs [] = []
@@ -97,12 +100,12 @@ hashObject content repo = getSha subcmd $ do
{- Given an info line from a git raw diff, and the filename, generates
- a line suitable for update_index that union merges the two sides of the
- diff. -}
-mergeFile :: (String, FilePath) -> Repo -> IO (Maybe String)
-mergeFile (info, file) repo = case filter (/= nullsha) [asha, bsha] of
+mergeFile :: (String, FilePath) -> CatFileHandle -> Repo -> IO (Maybe String)
+mergeFile (info, file) h repo = case filter (/= nullsha) [asha, bsha] of
[] -> return Nothing
(sha:[]) -> return $ Just $ update_index_line sha file
shas -> do
- content <- pipeRead (map Param ("show":shas)) repo
+ content <- L.concat <$> mapM (catObject h) shas
sha <- hashObject (unionmerge content) repo
return $ Just $ update_index_line sha file
where
diff --git a/debian/changelog b/debian/changelog
index a8ce33435..7ff819a14 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -6,6 +6,7 @@ git-annex (3.20111112) UNRELEASED; urgency=low
no longer needs to auto-merge.
* init: When run in an already initalized repository, and without
a description specified, don't delete the old description.
+ * Optimised union merging; now only runs git cat-file once.
-- Joey Hess <joeyh@debian.org> Sat, 12 Nov 2011 14:50:21 -0400
diff --git a/doc/todo/optimise_git-annex_merge.mdwn b/doc/todo/optimise_git-annex_merge.mdwn
index a2cdfb15f..2df196cfd 100644
--- a/doc/todo/optimise_git-annex_merge.mdwn
+++ b/doc/todo/optimise_git-annex_merge.mdwn
@@ -8,6 +8,10 @@ Instead, I'd like a way to stream multiple objects into git using stdin.
Sometime, should look at either extending git-hash-object to support that,
or possibly look at using git-fast-import instead.
+---
+
`git-annex merge` also runs `git show` once per file that needs to be
merged. This could be reduced to a single call to `git-cat-file --batch`,
There is already a Git.CatFile library that can do this easily. --[[Joey]]
+
+> This is now done, part above remains todo. --[[Joey]]