summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-01-11 15:43:09 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-01-11 16:03:45 -0400
commit5387b44657d01e436282e3ae6650a8f083b5ef00 (patch)
treec208fb8505e28d1998ea96a1cf63929e9053749d
parent4979c878da4dcacf699b4d0b2e2f173e0d0bd06e (diff)
safe recv-key in direct mode
Checks the key's size and checksum. This is sorta expensive, but it avoids needing to add another round-trip to the protocol.
-rw-r--r--Command/RecvKey.hs31
-rw-r--r--Fields.hs3
-rw-r--r--GitAnnexShell.hs1
-rw-r--r--Remote/Git.hs2
-rw-r--r--debian/changelog6
-rw-r--r--doc/design/assistant/desymlink.mdwn59
-rw-r--r--doc/git-annex-shell.mdwn3
7 files changed, 71 insertions, 34 deletions
diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs
index 62ab9a7ee..11a5fd5ca 100644
--- a/Command/RecvKey.hs
+++ b/Command/RecvKey.hs
@@ -14,6 +14,10 @@ import Annex.Content
import Utility.Rsync
import Logs.Transfer
import Command.SendKey (fieldTransfer)
+import qualified Fields
+import qualified Types.Key
+import qualified Types.Backend
+import qualified Backend
def :: [Command]
def = [noCommit $ command "recvkey" paramKey seek
@@ -26,7 +30,7 @@ start :: Key -> CommandStart
start key = ifM (inAnnex key)
( error "key is already present in annex"
, fieldTransfer Download key $ \_p -> do
- ifM (getViaTmp key $ liftIO . rsyncServerReceive)
+ ifM (getViaTmp key go)
( do
-- forcibly quit after receiving one key,
-- and shutdown cleanly
@@ -35,3 +39,28 @@ start key = ifM (inAnnex key)
, return False
)
)
+ where
+ go tmp = ifM (liftIO $ rsyncServerReceive tmp)
+ ( ifM (isJust <$> Fields.getField Fields.direct)
+ ( directcheck tmp
+ , return True
+ )
+ , return False
+ )
+ {- If the sending repository uses direct mode, the file
+ - it sends could be modified as it's sending it. So check
+ - that the right size file was received, and that the key/value
+ - Backend is happy with it. -}
+ directcheck tmp = do
+ oksize <- case Types.Key.keySize key of
+ Nothing -> return True
+ Just size -> do
+ size' <- fromIntegral . fileSize
+ <$> liftIO (getFileStatus tmp)
+ return $ size == size'
+ if oksize
+ then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
+ Nothing -> return False
+ Just backend -> maybe (return True) (\a -> a key tmp)
+ (Types.Backend.fsckKey backend)
+ else return False
diff --git a/Fields.hs b/Fields.hs
index 145a8adca..ffd273be6 100644
--- a/Fields.hs
+++ b/Fields.hs
@@ -30,3 +30,6 @@ associatedFile :: Field
associatedFile = Field "associatedfile" $ \f ->
-- is the file a safe relative filename?
not (isAbsolute f) && not ("../" `isPrefixOf` f)
+
+direct :: Field
+direct = Field "direct" $ \f -> f == "1"
diff --git a/GitAnnexShell.hs b/GitAnnexShell.hs
index f77347a1c..fca36cfc5 100644
--- a/GitAnnexShell.hs
+++ b/GitAnnexShell.hs
@@ -122,6 +122,7 @@ checkField :: (String, String) -> Bool
checkField (field, value)
| field == fieldName remoteUUID = fieldCheck remoteUUID value
| field == fieldName associatedFile = fieldCheck associatedFile value
+ | field == fieldName direct = fieldCheck direct value
| otherwise = False
failure :: IO ()
diff --git a/Remote/Git.hs b/Remote/Git.hs
index e8e1a1ba2..8c8d1274c 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -398,7 +398,9 @@ rsyncOrCopyFile rsyncparams src dest p =
rsyncParamsRemote :: Remote -> Direction -> Key -> FilePath -> AssociatedFile -> Annex [CommandParam]
rsyncParamsRemote r direction key file afile = do
u <- getUUID
+ direct <- isDirect
let fields = (Fields.remoteUUID, fromUUID u)
+ : (Fields.direct, if direct then "1" else "")
: maybe [] (\f -> [(Fields.associatedFile, f)]) afile
Just (shellcmd, shellparams) <- git_annex_shell (repo r)
(if direction == Download then "sendkey" else "recvkey")
diff --git a/debian/changelog b/debian/changelog
index 4230d12c4..4a4498880 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,8 +1,10 @@
git-annex (3.20130108) UNRELEASED; urgency=low
+ * Now handles the case where a file that's being transferred to a remote
+ is modified in place, which direct mode allows to happen. When this
+ happens, the transfer now fails, rather than allow possibly corrupt
+ data into the remote.
* fsck: Better checking of file content in direct mode.
- * Special remotes now all rollback storage of keys that get modified
- during the transfer, which can happen in direct mode.
* drop: Suggest using git annex move when numcopies prevents dropping a file.
* webapp: Repo switcher filters out repos that do not exist any more
(or are on a drive that's not mounted).
diff --git a/doc/design/assistant/desymlink.mdwn b/doc/design/assistant/desymlink.mdwn
index 7931e9e6b..64c56f466 100644
--- a/doc/design/assistant/desymlink.mdwn
+++ b/doc/design/assistant/desymlink.mdwn
@@ -84,6 +84,32 @@ is converted to a real file when it becomes present.
## TODO
+* kqueue does not deliver an event when an existing file is modified.
+ This doesn't affect OSX, which uses FSEvents now, but it makes direct
+ mode assistant not 100% on other BSD's.
+
+## done
+
+* `git annex sync` updates the key to files mappings for files changed,
+ but needs much other work to handle direct mode:
+ * Generate git commit, without running `git commit`, because it will
+ want to stage the full files. **done**
+ * Update location logs for any files deleted by a commit. **done**
+ * Generate a git merge, without running `git merge` (or possibly running
+ it in a scratch repo?), because it will stumble over the direct files.
+ **done**
+ * Drop contents of files deleted by a merge (including updating the
+ location log), or if we cannot drop,
+ move their contents to `.git/annex/objects/`. **no** .. instead,
+ avoid ever losing file contents in a direct mode merge. If the file is
+ deleted, its content is moved back to .git/annex/objects, if necessary.
+ * When a merge adds a symlink pointing at a key that is present in the
+ repo, replace the symlink with the direct file (either moving out
+ of `.git/annex/objects/` or hard-linking if the same key is present
+ elsewhere in the tree. **done**
+ * handle merge conflicts on direct mode files **done**
+* support direct mode in the assistant (many little fixes)
+
* Deal with files changing as they're being transferred from a direct mode
repository to another git repository. The remote repo currently will
accept the bad data and update the location log to say it has the key.
@@ -113,34 +139,7 @@ is converted to a real file when it becomes present.
the temp file, which is probably corrupt. (Could in future use it as a
basis for transferring the new key..) **done**
- For git remotes, add a flag to `git-annex-shell recvkey` (using a field
+ For git remotes, added a flag to `git-annex-shell recvkey` (using a field
after the "--" to remain back-compat). With this flag, after receiving
- the data, the remote should wait for a signal that the data is good
- before it updates the location log. The signal could just be a "1"
- sent over the ssh channel. Or another `git-annex-shell` command. **TODO**
-
-* kqueue does not deliver an event when an existing file is modified.
- This doesn't affect OSX, which uses FSEvents now, but it makes direct
- mode assistant not 100% on other BSD's.
-
-## done
-
-* `git annex sync` updates the key to files mappings for files changed,
- but needs much other work to handle direct mode:
- * Generate git commit, without running `git commit`, because it will
- want to stage the full files. **done**
- * Update location logs for any files deleted by a commit. **done**
- * Generate a git merge, without running `git merge` (or possibly running
- it in a scratch repo?), because it will stumble over the direct files.
- **done**
- * Drop contents of files deleted by a merge (including updating the
- location log), or if we cannot drop,
- move their contents to `.git/annex/objects/`. **no** .. instead,
- avoid ever losing file contents in a direct mode merge. If the file is
- deleted, its content is moved back to .git/annex/objects, if necessary.
- * When a merge adds a symlink pointing at a key that is present in the
- repo, replace the symlink with the direct file (either moving out
- of `.git/annex/objects/` or hard-linking if the same key is present
- elsewhere in the tree. **done**
- * handle merge conflicts on direct mode files **done**
-* support direct mode in the assistant (many little fixes)
+ the data, the remote fscks the data. This is not optimal, but avoids
+ needing another round-trip, or a protocol change.
diff --git a/doc/git-annex-shell.mdwn b/doc/git-annex-shell.mdwn
index 5fbc6de53..38659d0e2 100644
--- a/doc/git-annex-shell.mdwn
+++ b/doc/git-annex-shell.mdwn
@@ -76,7 +76,8 @@ to git-annex-shell are:
past versions of git-annex-shell (that ignore these, but would choke
on new dashed options).
- Currently used fields include remoteuuid= and associatedfile=
+ Currently used fields include remoteuuid=, associatedfile=,
+ and direct=
# HOOK