summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-11-28 15:26:27 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-11-28 16:17:55 -0400
commit6869e6023e21698038da7e4a858cbaf6f7b7bbed (patch)
treed6ae8aecbc2b8f65b36f3e0e1dba740d1308bb2e
parentff2d9c828379ce29e5feb6ac770996be04ac072f (diff)
support .git/annex on a different disk than the rest of the repo
The only fully supported thing is to have the main repository on one disk, and .git/annex on another. Only commands that move data in/out of the annex will need to copy it across devices. There is only partial support for putting arbitrary subdirectories of .git/annex on different devices. For one thing, but this can require more copies to be done. For example, when .git/annex/tmp is on one device, and .git/annex/journal on another, every journal write involves a call to mv(1). Also, there are a few places that make hard links between various subdirectories of .git/annex with createLink, that are not handled. In the common case without cross-device, the new moveFile is actually faster than renameFile, avoiding an unncessary stat to check that a file (not a directory) is being moved. Of course if a cross-device move is needed, it is as slow as mv(1) of the data.
-rw-r--r--Annex/Branch.hs2
-rw-r--r--Annex/Content.hs8
-rw-r--r--Command/Add.hs2
-rw-r--r--Command/Unlock.hs2
-rw-r--r--Common.hs1
-rw-r--r--Utility/Directory.hs51
-rw-r--r--debian/changelog2
-rw-r--r--doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn13
-rw-r--r--doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn11
9 files changed, 81 insertions, 11 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index ccc614555..a92f05b2c 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -312,7 +312,7 @@ setJournalFile file content = do
let jfile = journalFile g file
let tmpfile = gitAnnexTmpDir g </> takeFileName jfile
writeBinaryFile tmpfile content
- renameFile tmpfile jfile
+ moveFile tmpfile jfile
{- Gets any journalled content for a file in the branch. -}
getJournalFile :: FilePath -> Annex (Maybe String)
diff --git a/Annex/Content.hs b/Annex/Content.hs
index 83839ea13..f5571b54a 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -113,7 +113,7 @@ logStatus key status = do
u <- getUUID
logChange key u status
-{- Runs an action, passing it a temporary filename to download,
+{- Runs an action, passing it a temporary filename to get,
- and if the action succeeds, moves the temp file into
- the annex as a key's content. -}
getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool
@@ -221,7 +221,7 @@ moveAnnex key src = do
else liftIO $ do
createDirectoryIfMissing True dir
allowWrite dir -- in case the directory already exists
- renameFile src dest
+ moveFile src dest
preventWrite dest
preventWrite dir
@@ -243,7 +243,7 @@ fromAnnex :: Key -> FilePath -> Annex ()
fromAnnex key dest = withObjectLoc key $ \(dir, file) -> liftIO $ do
allowWrite dir
allowWrite file
- renameFile file dest
+ moveFile file dest
removeDirectory dir
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
@@ -256,7 +256,7 @@ moveBad key = do
liftIO $ do
createDirectoryIfMissing True (parentDir dest)
allowWrite (parentDir src)
- renameFile src dest
+ moveFile src dest
removeDirectory (parentDir src)
logStatus key InfoMissing
return dest
diff --git a/Command/Add.hs b/Command/Add.hs
index ab104b53c..130f5e311 100644
--- a/Command/Add.hs
+++ b/Command/Add.hs
@@ -61,7 +61,7 @@ undo file key e = do
tryharder :: IOException -> Annex ()
tryharder _ = do
src <- fromRepo $ gitAnnexLocation key
- liftIO $ renameFile src file
+ liftIO $ moveFile src file
cleanup :: FilePath -> Key -> Bool -> CommandCleanup
cleanup file key hascontent = do
diff --git a/Command/Unlock.hs b/Command/Unlock.hs
index 22f9ce710..b6f39488d 100644
--- a/Command/Unlock.hs
+++ b/Command/Unlock.hs
@@ -46,7 +46,7 @@ perform dest key = do
then do
liftIO $ do
removeFile dest
- renameFile tmpdest dest
+ moveFile tmpdest dest
allowWrite dest
next $ return True
else error "copy failed!"
diff --git a/Common.hs b/Common.hs
index e0132d9e9..a3802da5f 100644
--- a/Common.hs
+++ b/Common.hs
@@ -23,3 +23,4 @@ import Utility.Misc as X
import Utility.Conditional as X
import Utility.SafeCommand as X
import Utility.Path as X
+import Utility.Directory as X
diff --git a/Utility/Directory.hs b/Utility/Directory.hs
new file mode 100644
index 000000000..7f8822fca
--- /dev/null
+++ b/Utility/Directory.hs
@@ -0,0 +1,51 @@
+{- directory manipulation
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Utility.Directory where
+
+import System.IO.Error
+import System.Posix.Files
+import System.Directory
+import Control.Exception (throw)
+
+import Utility.SafeCommand
+import Utility.Conditional
+import Utility.TempFile
+
+{- Moves one filename to another.
+ - First tries a rename, but falls back to moving across devices if needed. -}
+moveFile :: FilePath -> FilePath -> IO ()
+moveFile src dest = try (rename src dest) >>= onrename
+ where
+ onrename (Right _) = return ()
+ onrename (Left e)
+ | isPermissionError e = rethrow
+ | isDoesNotExistError e = rethrow
+ | otherwise = do
+ -- copyFile is likely not as optimised as
+ -- the mv command, so we'll use the latter.
+ -- But, mv will move into a directory if
+ -- dest is one, which is not desired.
+ whenM (isdir dest) rethrow
+ viaTmp mv dest undefined
+ where
+ rethrow = throw e
+ mv tmp _ = do
+ ok <- boolSystem "mv" [Param "-f",
+ Param src, Param tmp]
+ if ok
+ then return ()
+ else do
+ -- delete any partial
+ _ <- try $
+ removeFile tmp
+ rethrow
+ isdir f = do
+ r <- try (getFileStatus f)
+ case r of
+ (Left _) -> return False
+ (Right s) -> return $ isDirectory s
diff --git a/debian/changelog b/debian/changelog
index 943d1e01c..265ba7184 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -6,6 +6,8 @@ git-annex (3.20111123) UNRELEASED; urgency=low
doubled output.
* Avoid needing haskell98 and other fixes for new ghc. Thanks, Mark Wright.
* Bugfix: dropunused did not drop keys with two spaces in their name.
+ * Support for storing .git/annex on a different device than the rest of the
+ git repository.
-- Joey Hess <joeyh@debian.org> Tue, 22 Nov 2011 17:53:42 -0400
diff --git a/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn b/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn
index e9a3ee95a..7daf03284 100644
--- a/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn
+++ b/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn
@@ -14,12 +14,19 @@ But when trying to add files i get:
I have tried both using bind-mount and with a sym-link.
-> I don't think this was a reversion; the forum post doesn't really
-> indicate it ever worked.
->
> Grepping for `renameFile` and `createLink` will find all the places
> in git-annex that assume one filesystem. These would have to be changed
> to catch errors and fall back to expensive copying.
>
> Putting a separate repository on the file server could work better
> depending on what you're trying to do. --[[Joey]]
+
+>> I've added support for putting `.git/annex` on a separate filesystem
+>> from the rest of the git repository.
+>>
+>> Putting individual subdirectories like `.git/annex/objects` on separate
+>> filesystems from other subdirectories is not fully supported; it may
+>> work but it may be slow and a few things (like `git annex migrate`) are
+>> known to fail due to using hard links. I don't think this is worth
+>> supporting. [[done]]
+>> --[[Joey]]
diff --git a/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn
index a04c8b040..f70c12702 100644
--- a/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn
+++ b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn
@@ -1,3 +1,12 @@
This works with bind-mount, I might try with softlinks as well.
-Going through git's data on push/pull can take ages on a spindle disk even if the repo is rather small in size. This is especially true if you are used to ssd speeds, but ssd storage is expensive. Storing the annex objects on a cheap spindle disk and everything else on a ssd makes things a _lot_ faster.
+Going through git's data on push/pull can take ages on a spindle disk even
+if the repo is rather small in size. This is especially true if you are
+used to ssd speeds, but ssd storage is expensive. Storing the annex objects
+on a cheap spindle disk and everything else on a ssd makes things a _lot_
+faster.
+
+> Update: git-annex supports `.git/annex/` being moved to a different disk
+> than the rest of the repisitory, but does *not* support individual
+> subdirectories, like `.git/annex/objects/` being on a different disk
+> than the main `.git/annex/` directory. --[[Joey]]