summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-06-13 20:23:47 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-06-13 20:33:52 -0400
commit30d7cce7ecb7dbcbabe83ab3f6cd5f32ce685992 (patch)
tree9d02220fe41dbdd8c7e62812fda0fcf2fda6b022
parenteb5fbc4cb97b73ad5d5d194600cc52c99edc89a0 (diff)
rsync is now used when copying files from repos on other filesystems
cp is still used when copying file from repos on the same filesystem, since --reflink=auto can make it significantly faster on filesystems such as btrfs. Directory special remotes still use cp, not rsync. It's not clear what tmp file should be used when rsyncing to such a remote.
-rw-r--r--Remote/Git.hs49
-rw-r--r--debian/changelog9
-rw-r--r--doc/todo/file_copy_progress_bar.mdwn2
3 files changed, 44 insertions, 16 deletions
diff --git a/Remote/Git.hs b/Remote/Git.hs
index 67d49df7d..0d8e2425a 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -11,6 +11,7 @@ import Control.Exception.Extensible
import Control.Monad.State (liftIO)
import qualified Data.Map as M
import System.Cmd.Utils
+import System.Posix.Files
import Types
import Types.Remote
@@ -130,10 +131,10 @@ dropKey r key =
{- Tries to copy a key's content from a remote's annex to a file. -}
copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool
copyFromRemote r key file
- | not $ Git.repoIsUrl r = liftIO $ copyFile (gitAnnexLocation r key) file
- | Git.repoIsSsh r = rsynchelper r True key file
+ | not $ Git.repoIsUrl r = rsyncOrCopyFile r (gitAnnexLocation r key) file
+ | Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key file
| otherwise = error "copying from non-ssh repo not supported"
-
+
{- Tries to copy a key's content to a remote's annex. -}
copyToRemote :: Git.Repo -> Key -> Annex Bool
copyToRemote r key
@@ -145,19 +146,18 @@ copyToRemote r key
a <- Annex.new r []
Annex.eval a $ do
ok <- Content.getViaTmp key $
- \f -> liftIO $ copyFile keysrc f
+ rsyncOrCopyFile r keysrc
AnnexQueue.flush True
return ok
| Git.repoIsSsh r = do
g <- Annex.gitRepo
let keysrc = gitAnnexLocation g key
- rsynchelper r False key keysrc
+ rsyncHelper =<< rsyncParamsRemote r False key keysrc
| otherwise = error "copying to non-ssh repo not supported"
-rsynchelper :: Git.Repo -> Bool -> Key -> FilePath -> Annex (Bool)
-rsynchelper r sending key file = do
+rsyncHelper :: [CommandParam] -> Annex (Bool)
+rsyncHelper p = do
showProgress -- make way for progress bar
- p <- rsyncParams r sending key file
res <- liftIO $ rsync p
if res
then return res
@@ -165,10 +165,22 @@ rsynchelper r sending key file = do
showLongNote "rsync failed -- run git annex again to resume file transfer"
return res
+{- Copys a file with rsync unless both locations are on the same
+ - filesystem. Then cp could be faster. -}
+rsyncOrCopyFile :: Git.Repo -> FilePath -> FilePath -> Annex Bool
+rsyncOrCopyFile r src dest = do
+ ss <- liftIO $ getFileStatus src
+ ds <- liftIO $ getFileStatus dest
+ if deviceID ss == deviceID ds
+ then liftIO $ copyFile src dest
+ else do
+ params <- rsyncParams r
+ rsyncHelper $ params ++ [Param src, Param dest]
+
{- Generates rsync parameters that ssh to the remote and asks it
- to either receive or send the key's content. -}
-rsyncParams :: Git.Repo -> Bool -> Key -> FilePath -> Annex [CommandParam]
-rsyncParams r sending key file = do
+rsyncParamsRemote :: Git.Repo -> Bool -> Key -> FilePath -> Annex [CommandParam]
+rsyncParamsRemote r sending key file = do
Just (shellcmd, shellparams) <- git_annex_shell r
(if sending then "sendkey" else "recvkey")
[ Param $ show key
@@ -179,15 +191,20 @@ rsyncParams r sending key file = do
]
-- Convert the ssh command into rsync command line.
let eparam = rsyncShell (Param shellcmd:shellparams)
- o <- getConfig r "rsync-options" ""
- let base = options ++ map Param (words o) ++ eparam
+ o <- rsyncParams r
if sending
- then return $ base ++ [dummy, File file]
- else return $ base ++ [File file, dummy]
+ then return $ o ++ eparam ++ [dummy, File file]
+ else return $ o ++ eparam ++ [File file, dummy]
where
- -- inplace makes rsync resume partial files
- options = [Params "-p --progress --inplace"]
-- the rsync shell parameter controls where rsync
-- goes, so the source/dest parameter can be a dummy value,
-- that just enables remote rsync mode.
dummy = Param ":"
+
+rsyncParams :: Git.Repo -> Annex [CommandParam]
+rsyncParams r = do
+ o <- getConfig r "rsync-options" ""
+ return $ options ++ map Param (words o)
+ where
+ -- --inplace to resume partial files
+ options = [Params "-p --progress --inplace"]
diff --git a/debian/changelog b/debian/changelog
index 09655df65..0837a9f34 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,12 @@
+git-annex (0.20110611) UNRELEASED; urgency=low
+
+ * rsync is now used when copying files from repos on other filesystems.
+ cp is still used when copying file from repos on the same filesystem,
+ since --reflink=auto can make it significantly faster on filesystems
+ such as btrfs.
+
+ -- Joey Hess <joeyh@debian.org> Mon, 13 Jun 2011 19:53:24 -0400
+
git-annex (0.20110610) unstable; urgency=low
* Add --numcopies option.
diff --git a/doc/todo/file_copy_progress_bar.mdwn b/doc/todo/file_copy_progress_bar.mdwn
index cd4ea33b7..847c1d1eb 100644
--- a/doc/todo/file_copy_progress_bar.mdwn
+++ b/doc/todo/file_copy_progress_bar.mdwn
@@ -1,3 +1,5 @@
Find a way to copy a file with a progress bar, while still preserving
stat. Easiest way might be to use pv and fix up the permissions etc
after?
+
+[[done]]