summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/Branch.hs3
-rw-r--r--Command/Status.hs2
-rw-r--r--Git/UnionMerge.hs86
-rw-r--r--Logs/Trust.hs5
-rw-r--r--Logs/UUID.hs6
-rw-r--r--debian/changelog1
-rw-r--r--doc/todo/optimise_git-annex_merge.mdwn6
7 files changed, 69 insertions, 40 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 20134003d..ae33f66cf 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -56,7 +56,8 @@ index g = gitAnnexDir g </> "index"
- and merge in changes from other branches.
-}
genIndex :: Git.Repo -> IO ()
-genIndex g = Git.UnionMerge.ls_tree fullname g >>= Git.UnionMerge.update_index g
+genIndex g = Git.UnionMerge.update_index_via g
+ [Git.UnionMerge.ls_tree fullname g]
{- Runs an action using the branch's index file. -}
withIndex :: Annex a -> Annex a
diff --git a/Command/Status.hs b/Command/Status.hs
index 2c0afa62c..47b500b90 100644
--- a/Command/Status.hs
+++ b/Command/Status.hs
@@ -102,7 +102,7 @@ supported_remote_types = stat "supported remote types" $
remote_list :: TrustLevel -> String -> Stat
remote_list level desc = stat n $ lift $ do
- us <- M.keys <$> uuidMap
+ us <- uuidList
rs <- fst <$> trustPartition level us
s <- prettyPrintUUIDs n rs
return $ if null s then "0" else show (length rs) ++ "\n" ++ init s
diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs
index 67e6fd951..a2344e59d 100644
--- a/Git/UnionMerge.hs
+++ b/Git/UnionMerge.hs
@@ -9,14 +9,13 @@ module Git.UnionMerge (
merge,
merge_index,
update_index,
+ update_index_via,
update_index_line,
ls_tree
) where
import System.Cmd.Utils
import Data.List
-import Data.Maybe
-import Data.String.Utils
import qualified Data.ByteString.Lazy.Char8 as L
import Common
@@ -30,44 +29,58 @@ import Git.CatFile
-}
merge :: String -> String -> Repo -> IO ()
merge x y repo = do
- a <- ls_tree x repo
h <- catFileStart repo
- b <- merge_trees x y h repo
+ update_index_via repo
+ [ ls_tree x repo
+ , merge_trees x y h repo
+ ]
catFileStop h
- update_index repo (a++b)
{- Merges a list of branches into the index. Previously staged changed in
- the index are preserved (and participate in the merge). -}
merge_index :: CatFileHandle -> Repo -> [String] -> IO ()
merge_index h repo bs =
- update_index repo =<< concat <$> mapM (\b -> merge_tree_index b h repo) bs
+ update_index_via repo $ map (\b -> merge_tree_index b h repo) bs
-{- Feeds a list into update-index. Later items in the list can override
+{- Feeds content into update-index. Later items in the list can override
- earlier ones, so the list can be generated from any combination of
- ls_tree, merge_trees, and merge_tree_index. -}
update_index :: Repo -> [String] -> IO ()
-update_index repo l = togit ["update-index", "-z", "--index-info"] (join "\0" l)
+update_index repo ls = update_index_via repo [\s -> mapM_ s ls]
+
+type Streamer = (String -> IO ()) -> IO ()
+
+{- Streams content into update-index. -}
+update_index_via :: Repo -> [Streamer] -> IO ()
+update_index_via repo as = do
+ (p, h) <- hPipeTo "git" (toCommand $ Git.gitCommandLine params repo)
+ forM_ as (stream h)
+ hClose h
+ forceSuccess p
where
- togit ps content = pipeWrite (map Param ps) (L.pack content) repo
- >>= forceSuccess
+ params = map Param ["update-index", "-z", "--index-info"]
+ stream h a = a (streamer h)
+ streamer h s = do
+ hPutStr h s
+ hPutStr h "\0"
{- Generates a line suitable to be fed into update-index, to add
- a given file with a given sha. -}
update_index_line :: String -> FilePath -> String
update_index_line sha file = "100644 blob " ++ sha ++ "\t" ++ file
-{- Gets the contents of a tree in a format suitable for update_index. -}
-ls_tree :: String -> Repo -> IO [String]
-ls_tree x = pipeNullSplit params
+{- Gets the contents of a tree. -}
+ls_tree :: String -> Repo -> Streamer
+ls_tree x repo streamer = mapM_ streamer =<< pipeNullSplit params repo
where
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
{- For merging two trees. -}
-merge_trees :: String -> String -> CatFileHandle -> Repo -> IO [String]
+merge_trees :: String -> String -> CatFileHandle -> Repo -> Streamer
merge_trees x y h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
{- For merging a single tree into the index. -}
-merge_tree_index :: String -> CatFileHandle -> Repo -> IO [String]
+merge_tree_index :: String -> CatFileHandle -> Repo -> Streamer
merge_tree_index x h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x]
diff_opts :: [String]
@@ -75,33 +88,20 @@ diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"]
{- Calculates how to perform a merge, using git to get a raw diff,
- and returning a list suitable for update_index. -}
-calc_merge :: CatFileHandle -> [String] -> Repo -> IO [String]
-calc_merge h differ repo = do
- diff <- pipeNullSplit (map Param differ) repo
- l <- mapM (\p -> mergeFile p h repo) (pairs diff)
- return $ catMaybes l
+calc_merge :: CatFileHandle -> [String] -> Repo -> Streamer
+calc_merge ch differ repo streamer = gendiff >>= go
where
- pairs [] = []
- pairs (_:[]) = error "calc_merge parse error"
- pairs (a:b:rest) = (a,b):pairs rest
-
-{- Injects some content into git, returning its hash. -}
-hashObject :: L.ByteString -> Repo -> IO String
-hashObject content repo = getSha subcmd $ do
- (h, s) <- pipeWriteRead (map Param params) content repo
- L.length s `seq` do
- forceSuccess h
- reap -- XXX unsure why this is needed
- return $ L.unpack s
- where
- subcmd = "hash-object"
- params = [subcmd, "-w", "--stdin"]
+ gendiff = pipeNullSplit (map Param differ) repo
+ go [] = return ()
+ go (info:file:rest) = mergeFile info file ch repo >>=
+ maybe (go rest) (\l -> streamer l >> go rest)
+ go (_:[]) = error "calc_merge parse error"
{- Given an info line from a git raw diff, and the filename, generates
- a line suitable for update_index that union merges the two sides of the
- diff. -}
-mergeFile :: (String, FilePath) -> CatFileHandle -> Repo -> IO (Maybe String)
-mergeFile (info, file) h repo = case filter (/= nullsha) [asha, bsha] of
+mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String)
+mergeFile info file h repo = case filter (/= nullsha) [asha, bsha] of
[] -> return Nothing
(sha:[]) -> return $ Just $ update_index_line sha file
shas -> do
@@ -112,3 +112,15 @@ mergeFile (info, file) h repo = case filter (/= nullsha) [asha, bsha] of
[_colonamode, _bmode, asha, bsha, _status] = words info
nullsha = replicate shaSize '0'
unionmerge = L.unlines . nub . L.lines
+
+{- Injects some content into git, returning its hash. -}
+hashObject :: L.ByteString -> Repo -> IO String
+hashObject content repo = getSha subcmd $ do
+ (h, s) <- pipeWriteRead (map Param params) content repo
+ L.length s `seq` do
+ forceSuccess h
+ reap -- XXX unsure why this is needed
+ return $ L.unpack s
+ where
+ subcmd = "hash-object"
+ params = [subcmd, "-w", "--stdin"]
diff --git a/Logs/Trust.hs b/Logs/Trust.hs
index cb91861fd..6305d281f 100644
--- a/Logs/Trust.hs
+++ b/Logs/Trust.hs
@@ -20,6 +20,7 @@ import Types.TrustLevel
import qualified Annex.Branch
import qualified Annex
import Logs.UUIDBased
+import Logs.UUID
{- Filename of trust.log. -}
trustLog :: FilePath
@@ -27,6 +28,10 @@ trustLog = "trust.log"
{- Returns a list of UUIDs at the specified trust level. -}
trustGet :: TrustLevel -> Annex [UUID]
+trustGet SemiTrusted = do -- special case; trustMap does not contain all these
+ others <- M.keys . M.filter (/= SemiTrusted) <$> trustMap
+ all <- uuidList
+ return $ all \\ others
trustGet level = M.keys . M.filter (== level) <$> trustMap
{- Read the trustLog into a map, overriding with any
diff --git a/Logs/UUID.hs b/Logs/UUID.hs
index 20f43d15c..17b0330c1 100644
--- a/Logs/UUID.hs
+++ b/Logs/UUID.hs
@@ -16,7 +16,8 @@
module Logs.UUID (
describeUUID,
recordUUID,
- uuidMap
+ uuidMap,
+ uuidList
) where
import qualified Data.Map as M
@@ -87,3 +88,6 @@ uuidMap = do
return $ M.insertWith' preferold u "" m
where
preferold = flip const
+
+uuidList :: Annex [UUID]
+uuidList = M.keys <$> uuidMap
diff --git a/debian/changelog b/debian/changelog
index c145b14f9..37578f597 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -13,6 +13,7 @@ git-annex (3.20111112) UNRELEASED; urgency=low
displayed)
* status: --fast is back
* Fix support for insteadOf url remapping. Closes: #644278
+ * merge: Now runs in constant space.
-- Joey Hess <joeyh@debian.org> Sat, 12 Nov 2011 14:50:21 -0400
diff --git a/doc/todo/optimise_git-annex_merge.mdwn b/doc/todo/optimise_git-annex_merge.mdwn
index 2df196cfd..91d18ebd7 100644
--- a/doc/todo/optimise_git-annex_merge.mdwn
+++ b/doc/todo/optimise_git-annex_merge.mdwn
@@ -15,3 +15,9 @@ merged. This could be reduced to a single call to `git-cat-file --batch`,
There is already a Git.CatFile library that can do this easily. --[[Joey]]
> This is now done, part above remains todo. --[[Joey]]
+
+---
+
+Merging used to use memory proportional to the size of the diff. It now
+streams data, running in constant space. This probably sped it up a lot,
+as there's much less allocation and GC action. --[[Joey]]