summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-01-08 15:54:14 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-01-08 15:54:14 -0400
commita78b0555e1d46c4548cda3aaa1709040f6fa7f33 (patch)
tree9313997b7a9e3758b27cf79c0319d730397421a4
parent32b0e103909035ad0f25427c57a1ff504aefcada (diff)
New migrate subcommand can be used to switch files to using a different backend, safely and with no duplication of content.
-rw-r--r--Command.hs6
-rw-r--r--Command/Add.hs5
-rw-r--r--Command/Migrate.hs63
-rw-r--r--GitAnnex.hs2
-rw-r--r--TypeInternals.hs3
-rw-r--r--debian/changelog7
-rw-r--r--doc/git-annex.mdwn8
-rw-r--r--doc/walkthrough.mdwn35
8 files changed, 112 insertions, 17 deletions
diff --git a/Command.hs b/Command.hs
index 690dd20ec..b83e640b9 100644
--- a/Command.hs
+++ b/Command.hs
@@ -117,6 +117,12 @@ withAttrFilesInGit attr a params = do
files' <- filterFiles files
pairs <- liftIO $ Git.checkAttr repo attr files'
return $ map a pairs
+withBackendFilesInGit :: CommandSeekBackendFiles
+withBackendFilesInGit a params = do
+ repo <- Annex.gitRepo
+ files <- liftIO $ Git.inRepo repo params
+ files' <- filterFiles files
+ backendPairs a files'
withFilesMissing :: CommandSeekStrings
withFilesMissing a params = do
files <- liftIO $ filterM missing params
diff --git a/Command/Add.hs b/Command/Add.hs
index bc869a67d..c74b726e3 100644
--- a/Command/Add.hs
+++ b/Command/Add.hs
@@ -42,11 +42,12 @@ perform (file, backend) = do
stored <- Backend.storeFileKey file backend
case stored of
Nothing -> return Nothing
- Just (key, _) -> return $ Just $ cleanup file key
+ Just (key, _) -> do
+ moveAnnex key file
+ return $ Just $ cleanup file key
cleanup :: FilePath -> Key -> CommandCleanup
cleanup file key = do
- moveAnnex key file
logStatus key ValuePresent
link <- calcGitLink file key
diff --git a/Command/Migrate.hs b/Command/Migrate.hs
new file mode 100644
index 000000000..0caded6d1
--- /dev/null
+++ b/Command/Migrate.hs
@@ -0,0 +1,63 @@
+{- git-annex command
+ -
+ - Copyright 2010 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.Migrate where
+
+import Control.Monad.State (liftIO)
+import System.Posix.Files
+import System.Directory
+
+import Command
+import qualified Annex
+import qualified Backend
+import Locations
+import Types
+import Core
+import Messages
+import qualified Command.Add
+
+command :: [Command]
+command = [Command "migrate" paramPath seek "switch data to different backend"]
+
+seek :: [CommandSeek]
+seek = [withBackendFilesInGit start]
+
+start :: CommandStartBackendFile
+start (_, Nothing) = return Nothing
+start (file, Just newbackend) = isAnnexed file $ \(key, oldbackend) -> do
+ exists <- inAnnex key
+ if (newbackend /= oldbackend) && exists
+ then do
+ showStart "migrate" file
+ return $ Just $ perform file key newbackend
+ else
+ return Nothing
+
+perform :: FilePath -> Key -> Backend -> CommandPerform
+perform file oldkey newbackend = do
+ g <- Annex.gitRepo
+
+ -- Store the old backend's cached key in the new backend
+ -- (the file can't be stored as usual, because it's already a symlink).
+ -- The old backend's key is not dropped from it, because there may
+ -- be other files still pointing at that key.
+ let src = annexLocation g oldkey
+ stored <- Backend.storeFileKey src $ Just newbackend
+ case stored of
+ Nothing -> return Nothing
+ Just (newkey, _) -> do
+ ok <- getViaTmp newkey $ \t -> do
+ -- Make a hard link to the old backend's
+ -- cached key, to avoid wasting disk space.
+ liftIO $ createLink src t
+ return True
+ if ok
+ then do
+ -- Update symlink to use the new key.
+ liftIO $ removeFile file
+ return $ Just $ Command.Add.cleanup file newkey
+ else return Nothing
diff --git a/GitAnnex.hs b/GitAnnex.hs
index 24c9ace0a..d9efdad2d 100644
--- a/GitAnnex.hs
+++ b/GitAnnex.hs
@@ -32,6 +32,7 @@ import qualified Command.Unlock
import qualified Command.Lock
import qualified Command.PreCommit
import qualified Command.Find
+import qualified Command.Migrate
import qualified Command.Uninit
import qualified Command.Trust
import qualified Command.Untrust
@@ -59,6 +60,7 @@ cmds = concat
, Command.Unused.command
, Command.DropUnused.command
, Command.Find.command
+ , Command.Migrate.command
]
options :: [Option]
diff --git a/TypeInternals.hs b/TypeInternals.hs
index fe6e562f9..12a9080b3 100644
--- a/TypeInternals.hs
+++ b/TypeInternals.hs
@@ -103,3 +103,6 @@ data Backend = Backend {
instance Show Backend where
show backend = "Backend { name =\"" ++ name backend ++ "\" }"
+
+instance Eq Backend where
+ a == b = name a == name b
diff --git a/debian/changelog b/debian/changelog
index 7ca74f994..85878113e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,10 +1,11 @@
git-annex (0.17) UNRELEASED; urgency=low
* unannex: Now skips files whose content is not present, rather than
- it being an error. This allows gradual conversion from one backend
- to another by running unannex followed by add in each repository.
+ it being an error.
+ * New migrate subcommand can be used to switch files to using a different
+ backend, safely and with no duplication of content.
- -- Joey Hess <joeyh@debian.org> Sat, 08 Jan 2011 15:04:48 -0400
+ -- Joey Hess <joeyh@debian.org> Sat, 08 Jan 2011 13:45:06 -0400
git-annex (0.16) unstable; urgency=low
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index e99be4e40..6d106fea4 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -144,6 +144,14 @@ Many git-annex commands will stage changes for later `git commit` by you.
With no parameters, defaults to finding all files in the current directory
and its subdirectories.
+* migrate [path ...]
+
+ Changes the specified annexed files to store their content in the
+ default backend (or the one specified with --backend).
+
+ Note that the content is not removed from the backend it was previously in.
+ Use `git annex unused` to find and remove such content.
+
* unannex [path ...]
Use this to undo an accidental add command. This is not the command you
diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn
index 47f05ebcf..d2231c81e 100644
--- a/doc/walkthrough.mdwn
+++ b/doc/walkthrough.mdwn
@@ -277,25 +277,32 @@ add something like this to `.gitattributes`:
* annex.backend=SHA1
-## migrating between backends
+## migrating data to a new backend
-Perhaps you had been using the WORM backend, but now have configured
-git-annex to use SHA1 for new files. Your old files are still in WORM. How
-to migrate that content? A quick and dirty way is to use the unannex
-subcommand, which removes a file from git-annex's control, followed by
-a re-add of the file, to put it in the new backend.
+Maybe you started out using the WORM backend, and have now configured
+git-annex to use SHA1. But files you added to the annex before still
+use the WORM backend. There is a simple command that can migrate that
+data:
- # git annex unannex my_cool_big_file
- unannex my_cool_big_file ok
- # git annex add my_cool_big_file
- add my_cool_big_file (checksum ...) ok
+ # git annex migrate my_cool_big_file
+ migrate my_cool_big_file (checksum...) ok
+
+You can only migrate files whose content is currently available. Other
+files will be skipped.
+
+After migrating a file to a new backend, the old content in the old backend
+will still be present. That is necessary because multiple files
+can point to the same content. The `git annex unused` sucommand can be
+used to clear up that detritus later. Note that hard links are used,
+to avoid wasting disk space.
## unused data
It's possible for data to accumulate in the annex that no files point to
-nymore. One way it can happen is if you `git rm` a file without
+anymore. One way it can happen is if you `git rm` a file without
first calling `git annex drop`. And, when you modify an annexed file, the old
-content of the file remains in the annex.
+content of the file remains in the annex. Another way is when migrating
+between backends.
This might be historical data you want to preserve, so git-annex defaults to
preserving it. So from time to time, you may want to check for such data and
@@ -318,6 +325,10 @@ data anymore, you can easily remove it:
# git annex dropunused 1
dropunused 1 ok
+Hint: To drop a lot of unused data, use a command like this:
+
+ # git annex dropunused `seq 1 1000`
+
## fsck: verifying your data
You can use the fsck subcommand to check for problems in your data.