summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-06-06 02:16:21 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-06-06 02:16:21 -0400
commit27cfeca4ea8f2aa326e7d8416401c319133491db (patch)
tree7b2ddb41cc6b543b290aa24c5eecdf42d96c97f5
parenta7a729bce4db901a1142b5ef7ab8cab0d1311a66 (diff)
parentf1bd72ea546be705334ba8f6d01d9dcfb0c33cf9 (diff)
Merge branch 'master' into watch
-rw-r--r--Annex/Branch.hs9
-rw-r--r--Git/UnionMerge.hs41
-rw-r--r--Git/UpdateIndex.hs49
-rw-r--r--doc/design/assistant/blog/day_2__races.mdwn45
-rw-r--r--doc/design/assistant/inotify.mdwn17
5 files changed, 113 insertions, 48 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 706522f3b..c8d0719b0 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -33,6 +33,7 @@ import qualified Git.Command
import qualified Git.Ref
import qualified Git.Branch
import qualified Git.UnionMerge
+import qualified Git.UpdateIndex
import Git.HashObject
import qualified Git.Index
import Annex.CatFile
@@ -258,8 +259,8 @@ files = withIndexUpdate $ do
- in changes from other branches.
-}
genIndex :: Git.Repo -> IO ()
-genIndex g = Git.UnionMerge.stream_update_index g
- [Git.UnionMerge.ls_tree fullname g]
+genIndex g = Git.UpdateIndex.stream_update_index g
+ [Git.UpdateIndex.ls_tree fullname g]
{- Merges the specified refs into the index.
- Any changes staged in the index will be preserved. -}
@@ -335,13 +336,13 @@ stageJournal = do
g <- gitRepo
withIndex $ liftIO $ do
h <- hashObjectStart g
- Git.UnionMerge.stream_update_index g
+ Git.UpdateIndex.stream_update_index g
[genstream (gitAnnexJournalDir g) h fs]
hashObjectStop h
where
genstream dir h fs streamer = forM_ fs $ \file -> do
let path = dir </> file
sha <- hashFile h path
- _ <- streamer $ Git.UnionMerge.update_index_line
+ _ <- streamer $ Git.UpdateIndex.update_index_line
sha (fileJournal file)
removeFile path
diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs
index d68bb61ab..9ff820dc9 100644
--- a/Git/UnionMerge.hs
+++ b/Git/UnionMerge.hs
@@ -7,11 +7,7 @@
module Git.UnionMerge (
merge,
- merge_index,
- update_index,
- stream_update_index,
- update_index_line,
- ls_tree
+ merge_index
) where
import System.Cmd.Utils
@@ -24,8 +20,7 @@ import Git
import Git.Sha
import Git.CatFile
import Git.Command
-
-type Streamer = (String -> IO ()) -> IO ()
+import Git.UpdateIndex
{- Performs a union merge between two branches, staging it in the index.
- Any previously staged changes in the index will be lost.
@@ -47,38 +42,6 @@ merge_index :: CatFileHandle -> Repo -> [Ref] -> IO ()
merge_index h repo bs =
stream_update_index repo $ map (\b -> merge_tree_index b h repo) bs
-{- Feeds content into update-index. Later items in the list can override
- - earlier ones, so the list can be generated from any combination of
- - ls_tree, merge_trees, and merge_tree_index. -}
-update_index :: Repo -> [String] -> IO ()
-update_index repo ls = stream_update_index repo [(`mapM_` ls)]
-
-{- Streams content into update-index. -}
-stream_update_index :: Repo -> [Streamer] -> IO ()
-stream_update_index repo as = do
- (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
- fileEncoding h
- forM_ as (stream h)
- hClose h
- forceSuccess p
- where
- params = map Param ["update-index", "-z", "--index-info"]
- stream h a = a (streamer h)
- streamer h s = do
- hPutStr h s
- hPutStr h "\0"
-
-{- Generates a line suitable to be fed into update-index, to add
- - a given file with a given sha. -}
-update_index_line :: Sha -> FilePath -> String
-update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
-
-{- Gets the current tree for a ref. -}
-ls_tree :: Ref -> Repo -> Streamer
-ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
- where
- params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
-
{- For merging two trees. -}
merge_trees :: Ref -> Ref -> CatFileHandle -> Repo -> Streamer
merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs
new file mode 100644
index 000000000..04bc4da5b
--- /dev/null
+++ b/Git/UpdateIndex.hs
@@ -0,0 +1,49 @@
+{- git-update-index library
+ -
+ - Copyright 2011, 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.UpdateIndex (
+ Streamer,
+ stream_update_index,
+ update_index_line,
+ ls_tree
+) where
+
+import System.Cmd.Utils
+
+import Common
+import Git
+import Git.Command
+
+{- Streamers are passed a callback and should feed it lines in the form
+ - read by update-index, and generated by ls-tree. -}
+type Streamer = (String -> IO ()) -> IO ()
+
+{- Streams content into update-index from a list of Streamers. -}
+stream_update_index :: Repo -> [Streamer] -> IO ()
+stream_update_index repo as = do
+ (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
+ fileEncoding h
+ forM_ as (stream h)
+ hClose h
+ forceSuccess p
+ where
+ params = map Param ["update-index", "-z", "--index-info"]
+ stream h a = a (streamer h)
+ streamer h s = do
+ hPutStr h s
+ hPutStr h "\0"
+
+{- Generates a line suitable to be fed into update-index, to add
+ - a given file with a given sha. -}
+update_index_line :: Sha -> FilePath -> String
+update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
+
+{- Gets the current tree for a ref. -}
+ls_tree :: Ref -> Repo -> Streamer
+ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
+ where
+ params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
diff --git a/doc/design/assistant/blog/day_2__races.mdwn b/doc/design/assistant/blog/day_2__races.mdwn
new file mode 100644
index 000000000..fadedb5fb
--- /dev/null
+++ b/doc/design/assistant/blog/day_2__races.mdwn
@@ -0,0 +1,45 @@
+Last night I got `git annex watch` to also handle deletion of files.
+This was not as tricky as feared; the key is using `git rm --ignore-unmatch`,
+which avoids most problimatic situations (such as a just deleted file
+being added back before git is run).
+
+Also fixed some races when `git annex watch` is doing its startup scan of
+the tree, which might be changed as it's being traversed. Now only one
+thread performs actions at a time, so inotify events are queued up during
+the scan, and dealt with once it completes. It's worth noting that inotify
+can only buffer so many events .. Which might have been a problem except
+for a very nice feature of Haskell's inotify interface: It has a thread
+that drains the limited inotify buffer and does its own buffering.
+
+----
+
+Right now, `git annex watch` is not as fast as it could be when doing
+something like adding a lot of files, or deleting a lot of files.
+For each file, it currently runs a git command that updates the index.
+I did some work toward coalescing these into one command (which `git annex`
+already does normally). It's not quite ready to be turned on yet,
+because of some races involving `git add` that become much worse
+if it's delayed by event coalescing.
+
+----
+
+And races were the theme of today. Spent most of the day really
+getting to grips with all the fun races that can occur between
+modification happening to files, and `git annex watch`. The [[inotify]]
+page now has a long list of known races, some benign, and several,
+all involving adding files, that are quite nasty.
+
+I fixed one of those races this evening. The rest will probably involve
+moving away from using `git add`, which necessarily examines the file
+on disk, to directly shoving the symlink into git's index.
+
+BTW, it turns out that `dvcs-autosync` has grappled with some of these same
+races: <http://comments.gmane.org/gmane.comp.version-control.home-dir/665>
+I hope that `git annex watch` will be in a better place to deal with them,
+since it's only dealing with git, and with a restricted portion of it
+relevant to git-annex.
+
+It's important that `git annex watch` be rock solid. It's the foundation
+of the git annex assistant. Users should not need to worry about races
+when using it. Most users won't know what race conditions are. If only I
+could be so lucky!
diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn
index ca63a1c82..e7c61c68b 100644
--- a/doc/design/assistant/inotify.mdwn
+++ b/doc/design/assistant/inotify.mdwn
@@ -58,12 +58,19 @@ Many races need to be dealt with by this code. Here are some of them.
* File is added and then replaced with another file before the annex add
moves its content into the annex.
- **Currently unfixed**; The new content will be moved to the annex under the
- old checksum, and fsck will later catch this inconsistency.
+ Fixed this problem; Now it hard links the file to a temp directory and
+ operates on the hard link, which is also made unwritable.
- Possible fix: Move content someplace before doing checksumming. Perhaps
- using a hard link and removing the write bit to prevent modification
- while checksumming.
+* A process has a file open for write, another one closes it, and so it's
+ added. Then the first process modifies it.
+
+ **Currently unfixed**; This changes content in the annex, and fsck will
+ later catch the inconsistency.
+
+ Possible fixes: Somehow track or detect if a file is open for write
+ by any processes. Or, when possible, making a copy on write copy
+ before adding the file would avoid this. Or, as a last resort, make
+ an expensive copy of the file and add that.
* File is added and then replaced with another file before the annex add
makes its symlink.