summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2012-06-06 14:29:10 -0400
committerGravatar Joey Hess <joey@kitenet.net>2012-06-06 14:29:10 -0400
commitb819f644ad00c5ad13ba5b249d3e127fd59d8694 (patch)
tree6882fedee5af6ee4406ef68b9ec1ec50822f64e9
parent91db54076964979b6c50bd7efd0b895c4d416978 (diff)
close the git add race
There's a race adding a new file to the annex: The file is moved to the annex and replaced with a symlink, and then we git add the symlink. If someone comes along in the meantime and replaces the symlink with something else, such as a new large file, we add that instead. Which could be bad.. This race is fixed by avoiding using git add, instead the symlink is directly staged into the index. It would be nice to make `git annex add` use this same technique. I have not done so yet because it currently runs git update-index once per file, which would slow does `git annex add`. A future enhancement would be to extend the Git.Queue to include the ability to run update-index with a list of Streamers.
-rw-r--r--Annex/Branch.hs4
-rw-r--r--Command/Add.hs9
-rw-r--r--Command/Watch.hs54
3 files changed, 47 insertions, 20 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index c8d0719b0..1dacd5f32 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -35,6 +35,8 @@ import qualified Git.Branch
import qualified Git.UnionMerge
import qualified Git.UpdateIndex
import Git.HashObject
+import Git.Types
+import Git.FilePath
import qualified Git.Index
import Annex.CatFile
import Annex.Perms
@@ -344,5 +346,5 @@ stageJournal = do
let path = dir </> file
sha <- hashFile h path
_ <- streamer $ Git.UpdateIndex.update_index_line
- sha (fileJournal file)
+ sha FileBlob (asTopFilePath $ fileJournal file)
removeFile path
diff --git a/Command/Add.hs b/Command/Add.hs
index d83817d72..ea0f85033 100644
--- a/Command/Add.hs
+++ b/Command/Add.hs
@@ -94,8 +94,9 @@ undo file key e = do
src <- inRepo $ gitAnnexLocation key
liftIO $ moveFile src file
-{- Creates the symlink to the annexed content. -}
-link :: FilePath -> Key -> Bool -> Annex ()
+{- Creates the symlink to the annexed content, and also returns the link's
+ - text. -}
+link :: FilePath -> Key -> Bool -> Annex FilePath
link file key hascontent = handle (undo file key) $ do
l <- calcGitLink file key
liftIO $ createSymbolicLink l file
@@ -109,11 +110,13 @@ link file key hascontent = handle (undo file key) $ do
mtime <- modificationTime <$> getFileStatus file
touch file (TimeSpec mtime) False
+ return l
+
{- Note: Several other commands call this, and expect it to
- create the symlink and add it. -}
cleanup :: FilePath -> Key -> Bool -> CommandCleanup
cleanup file key hascontent = do
- link file key hascontent
+ _ <- link file key hascontent
params <- ifM (Annex.getState Annex.force)
( return [Param "-f"]
, return []
diff --git a/Command/Watch.hs b/Command/Watch.hs
index 15c862bec..7b714ac18 100644
--- a/Command/Watch.hs
+++ b/Command/Watch.hs
@@ -14,9 +14,13 @@ import Command
import Utility.Inotify
import Utility.ThreadLock
import qualified Annex
-import qualified Annex.Queue
-import qualified Command.Add as Add
+import qualified Command.Add
+import qualified Git
import qualified Git.Command
+import qualified Git.UpdateIndex
+import Git.HashObject
+import Git.Types
+import Git.FilePath
import qualified Backend
import Annex.Content
@@ -39,14 +43,14 @@ start = notBareRepo $ do
mvar <- liftIO $ newMVar state
next $ next $ liftIO $ withINotify $ \i -> do
let hook a = Just $ runAnnex mvar a
- watchDir i "." (not . gitdir)
+ watchDir i "." (not . pruned)
(hook onAdd) (hook onAddSymlink)
(hook onDel) (hook onDelDir)
putStrLn "(started)"
waitForTermination
return True
where
- gitdir dir = takeFileName dir /= ".git"
+ pruned dir = takeFileName dir /= ".git"
{- Runs a handler, inside the Annex monad.
-
@@ -66,15 +70,20 @@ runAnnex mvar a f = do
where
go state = Annex.exec state $ a f
-{- Adding a file is the same as git-annex add.
- - The git queue is immediately flushed, so the file is added to git
- - now, rather than later (when it may have been already moved or deleted!) -}
+{- Adding a file is tricky; the file has to be replaced with a symlink
+ - but this is race prone, as the symlink could be changed immediately
+ - after creation. To avoid that race, git add is not used to stage the
+ - symlink. -}
onAdd :: FilePath -> Annex ()
onAdd file = do
- void $ doCommand $ do
- showStart "add" file
- next $ Add.perform file
- Annex.Queue.flush
+ showStart "add" file
+ Command.Add.ingest file >>= go
+ where
+ go Nothing = showEndFail
+ go (Just key) = do
+ link <- Command.Add.link file key True
+ inRepo $ stageSymlink file link
+ showEndOk
{- A symlink might be an arbitrary symlink, which is just added.
- Or, if it is a git-annex symlink, ensure it points to the content
@@ -83,19 +92,20 @@ onAdd file = do
onAddSymlink :: FilePath -> Annex ()
onAddSymlink file = go =<< Backend.lookupFile file
where
- go Nothing = addlink
+ go Nothing = addlink =<< liftIO (readSymbolicLink file)
go (Just (key, _)) = do
link <- calcGitLink file key
ifM ((==) link <$> liftIO (readSymbolicLink file))
- ( addlink
+ ( addlink link
, do
liftIO $ removeFile file
liftIO $ createSymbolicLink link file
- addlink
+ addlink link
)
- addlink = inRepo $ Git.Command.run "add"
- [Params "--force --", File file]
+ addlink link = inRepo $ stageSymlink file link
+{- The file could reappear at any time, so --cached is used, to only delete
+ - it from the index. -}
onDel :: FilePath -> Annex ()
onDel file = inRepo $ Git.Command.run "rm"
[Params "--quiet --cached --ignore-unmatch --", File file]
@@ -105,3 +115,15 @@ onDel file = inRepo $ Git.Command.run "rm"
onDelDir :: FilePath -> Annex ()
onDelDir dir = inRepo $ Git.Command.run "rm"
[Params "--quiet -r --cached --ignore-unmatch --", File dir]
+
+{- Adds a symlink to the index, without ever accessing the actual symlink
+ - on disk. -}
+stageSymlink :: FilePath -> String -> Git.Repo -> IO ()
+stageSymlink file linktext repo = Git.UpdateIndex.stream_update_index repo [stage]
+ where
+ stage streamer = do
+ line <- Git.UpdateIndex.update_index_line
+ <$> (hashObject repo BlobObject linktext)
+ <*> pure SymlinkBlob
+ <*> toTopFilePath file repo
+ streamer line