summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-06-20 19:44:45 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-06-20 21:37:18 -0400
commitc835166a7cebfa44d232bbed7c5b5e22bdfeb2bd (patch)
tree2c27b9abcd52f49a2ce31568a75bf86ffc0e1e2c
parent91e50782ce6d634ffc8c2f809c80b6d4ff94a5ca (diff)
add git-union-merge
This is a new git subcommand, that does a generic union merge operation between two refs, storing the result in a branch. It operates efficiently without touching the working tree. It does need to write out a temporary index file, and may need to write out some other temp files as well. This could be useful for anything that stores data in a branch, and needs to merge changes into that branch without actually checking the branch out. Since conflict handling can't be done without a working copy, the merge type is always a union merge, which is fine for data stored in log format (as git-annex does), or in non-conflicting files (as pristine-tar does). This probably belongs in git proper, but it will live in git-annex for now. --- Plan is to move .git-annex/ to a git-annex branch, and use git-union-merge to handle merging changes when pulling from remotes. Some preliminary benchmarking using real .git-annex/ data indicates that it's quite fast, except for the "git add" call, which is as slow as "git add" tends to be with a big index.
-rw-r--r--.gitignore2
-rw-r--r--GitRepo.hs3
-rw-r--r--Makefile6
-rw-r--r--debian/changelog2
-rw-r--r--doc/git-union-merge.mdwn38
-rw-r--r--doc/todo/branching.mdwn6
-rw-r--r--git-union-merge.hs120
7 files changed, 170 insertions, 7 deletions
diff --git a/.gitignore b/.gitignore
index b73167c92..9a4bc80de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,8 +5,10 @@ configure
SysConfig.hs
git-annex
git-annex-shell
+git-union-merge
git-annex.1
git-annex-shell.1
+git-union-merge.1
doc/.ikiwiki
html
*.tix
diff --git a/GitRepo.hs b/GitRepo.hs
index 24bc9b5c2..0bee2842a 100644
--- a/GitRepo.hs
+++ b/GitRepo.hs
@@ -38,6 +38,7 @@ module GitRepo (
gitCommandLine,
run,
pipeRead,
+ pipeNullSplit,
attributes,
remotes,
remotesAdd,
@@ -412,7 +413,7 @@ typeChangedFiles' repo l middle = pipeNullSplit repo $ start ++ middle ++ end
end = [Param "--"] ++ map File l
{- Reads null terminated output of a git command (as enabled by the -z
- - parameter), and splits it into a list of files. -}
+ - parameter), and splits it into a list of files/lines/whatever. -}
pipeNullSplit :: Repo -> [CommandParam] -> IO [FilePath]
pipeNullSplit repo params = do
fs0 <- pipeRead repo params
diff --git a/Makefile b/Makefile
index 286c3a6e5..915b0bf0b 100644
--- a/Makefile
+++ b/Makefile
@@ -6,8 +6,8 @@ GHCFLAGS=-prof -auto-all -rtsopts -caf-all -fforce-recomp $(IGNORE)
endif
GHCMAKE=ghc $(GHCFLAGS) --make
-bins=git-annex git-annex-shell
-mans=git-annex.1 git-annex-shell.1
+bins=git-annex git-annex-shell git-union-merge
+mans=git-annex.1 git-annex-shell.1 git-union-merge.1
all: $(bins) $(mans) docs
@@ -33,6 +33,8 @@ git-annex.1: doc/git-annex.mdwn
./mdwn2man git-annex 1 doc/git-annex.mdwn > git-annex.1
git-annex-shell.1: doc/git-annex-shell.mdwn
./mdwn2man git-annex-shell 1 doc/git-annex-shell.mdwn > git-annex-shell.1
+git-union-merge.1: doc/git-union-merge.mdwn
+ ./mdwn2man git-union-merge 1 doc/git-union-merge.mdwn > git-union-merge.1
install: all
install -d $(DESTDIR)$(PREFIX)/bin
diff --git a/debian/changelog b/debian/changelog
index de012de5b..b96b9f43d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -6,6 +6,8 @@ git-annex (0.20110611) UNRELEASED; urgency=low
such as btrfs.
* Allow --trust etc to specify a repository by name, for temporarily
trusting repositories that are not configured remotes.
+ * git-union-merge: New git subcommand, that does a generic union merge
+ operation, and operates efficiently without touching the working tree.
-- Joey Hess <joeyh@debian.org> Mon, 13 Jun 2011 19:53:24 -0400
diff --git a/doc/git-union-merge.mdwn b/doc/git-union-merge.mdwn
new file mode 100644
index 000000000..ac8e8f7a9
--- /dev/null
+++ b/doc/git-union-merge.mdwn
@@ -0,0 +1,38 @@
+# NAME
+
+git-union-merge - Join branches together using a union merge
+
+# SYNOPSIS
+
+git union-merge branch ref ref
+
+# DESCRIPTION
+
+Does a union merge between two refs, storing the result in the
+specified branch.
+
+The union merge will always succeed, but assumes that files can be merged
+simply by concacenating together lines from all the oldrefs, in any order.
+So, this is useful only for branches containing log-type data.
+
+That this does not touch the checked out working copy. It operates
+entirely on git refs and branches.
+
+# EXAMPLE
+
+ git union-merge git-annex git-annex origin/git-annex
+
+Merges the current git-annex branch, and a version from origin,
+storing the result in the git-annex branch.
+
+# BUGS
+
+File modes are not currently merged.
+
+# AUTHOR
+
+Joey Hess <joey@kitenet.net>
+
+<http://git-annex.branchable.com/>
+
+Warning: this page is automatically made into a man page via [mdwn2man](http://git.ikiwiki.info/?p=ikiwiki;a=blob;f=mdwn2man;hb=HEAD). Edit with care
diff --git a/doc/todo/branching.mdwn b/doc/todo/branching.mdwn
index 9c44c03fe..37e7b6edd 100644
--- a/doc/todo/branching.mdwn
+++ b/doc/todo/branching.mdwn
@@ -148,10 +148,8 @@ problem generically. Something like this:
* For remotes, there are also `origin/B`, `otherremote/B`, etc.
* To merge two branches `B` and `foo/B`, construct a merge commit that
makes each file have all lines that were in either version of the file,
- with duplicates removed (probably). Do this without checking out a tree,
- or using a temporary directory. (One easy but expensive way is to just
- check out the branch to a temp dir, union merge into it, and remove the
- temp dir ... but it should be possible to do it without using a temp dir.)
+ with duplicates removed (probably). Do this without checking out a tree.
+ -- now implemented as git-union-merge
* As a `post-merge` hook, merge `*/B` into `B`. This will ensure `B`
is always up-to-date after a pull from a remote.
* When pushing to a remote, nothing need to be done, except ensure
diff --git a/git-union-merge.hs b/git-union-merge.hs
new file mode 100644
index 000000000..482f66daa
--- /dev/null
+++ b/git-union-merge.hs
@@ -0,0 +1,120 @@
+{- git-union-merge program
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+import System.Environment
+import System.FilePath
+import System.Directory
+import System.Cmd
+import System.Cmd.Utils
+import System.Posix.Env (setEnv)
+import System.Posix.Directory (changeWorkingDirectory)
+import Control.Monad (when, unless)
+import Data.List
+
+import qualified GitRepo as Git
+import Utility
+
+header :: String
+header = "Usage: git-union-merge branch ref ref"
+
+usage :: IO a
+usage = error $ "bad parameters\n\n" ++ header
+
+main :: IO ()
+main = do
+ [branch, aref, bref] <- parseArgs
+ g <- setup
+ stage g aref bref
+ commit g branch aref bref
+ cleanup g
+
+parseArgs :: IO [String]
+parseArgs = do
+ args <- getArgs
+ if (length args /= 3)
+ then usage
+ else return args
+
+tmpDir :: Git.Repo -> FilePath
+tmpDir g = Git.workTree g </> Git.gitDir g </> "tmp" </> "git-union-merge"
+
+tmpIndex :: Git.Repo -> FilePath
+tmpIndex g = Git.workTree g </> Git.gitDir g </> "tmp" </> "git-union-merge.index"
+
+{- Moves to a temporary directory, and configures git to use it as its
+ - working tree, and to use a temporary index file as well. -}
+setup :: IO Git.Repo
+setup = do
+ g <- Git.configRead =<< Git.repoFromCwd
+ cleanup g -- idempotency
+ let tmp = tmpDir g
+ createDirectoryIfMissing True tmp
+ changeWorkingDirectory tmp
+ -- Note that due to these variables being set, Git.run and
+ -- similar helpers cannot be used, as they override the work tree.
+ -- It is only safe to use Git.run etc when doing things that do
+ -- not operate on the work tree.
+ setEnv "GIT_WORK_TREE" tmp True
+ setEnv "GIT_INDEX_FILE" (tmpIndex g) True
+ return g
+
+cleanup :: Git.Repo -> IO ()
+cleanup g = do
+ e <- doesDirectoryExist (tmpDir g)
+ when e $ removeDirectoryRecursive (tmpDir g)
+ e' <- doesFileExist (tmpIndex g)
+ when e' $ removeFile (tmpIndex g)
+
+{- Stages the content of both refs into the index. -}
+stage :: Git.Repo -> String -> String -> IO ()
+stage g aref bref = do
+ -- populate index with the contents of aref, as a starting point
+ _ <- system $ "git ls-tree -r --full-name --full-tree " ++ aref ++
+ " | git update-index --index-info"
+ -- identify files that are different in bref, and stage merged files
+ diff <- Git.pipeNullSplit g $ map Param
+ ["diff-tree", "--raw", "-z", "--no-renames", "-l0", aref, bref]
+ mapM_ genfile (pairs diff)
+ _ <- system "git add ."
+ return ()
+ where
+ pairs [] = []
+ pairs (_:[]) = error "parse error"
+ pairs (a:b:rest) = (a,b):pairs rest
+
+ nullsha = take 40 $ repeat '0'
+
+ genfile (info, file) = do
+ let [_colonamode, _bmode, asha, bsha, _status] = words info
+ let shas =
+ if bsha == nullsha
+ then [] -- staged from aref
+ else
+ if asha == nullsha
+ then [bsha]
+ else [asha, bsha]
+ unless (null shas) $ do
+ content <- Git.pipeRead g $ map Param ("show":shas)
+ writeFile file $ unlines $ nub $ lines content
+
+{- Commits the index into the specified branch. -}
+commit :: Git.Repo -> String -> String -> String -> IO ()
+commit g branch aref bref = do
+ tree <- getsha $
+ pipeFrom "git" ["write-tree"]
+ sha <- getsha $
+ pipeBoth "git" ["commit-tree", tree, "-p", aref, "-p", bref]
+ "union merge"
+ Git.run g "update-ref" [Param $ "refs/heads/" ++ branch, Param sha]
+ where
+ getsha a = do
+ (_, t) <- a
+ let t' = if last t == '\n'
+ then take (length t - 1) t
+ else t
+ when (null t') $ error "failed to read sha from git"
+ return t'