summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-09-14 13:47:22 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-09-14 13:47:22 -0400
commit949b3f69d0f2b2a5c32a00d05d09a0b312fad35a (patch)
treecc81f8d61652741e365309b86a8020edbce6318e
parent1ac6217c74b63b9b154d5ee14ed72df8b5aa9268 (diff)
optimize: A new subcommand that either gets or drops file content as needed to work toward meeting the configured numcopies setting.
This is currently rather simplistic, though still useful. In the future, it could become smarter about what content is stored where, etc.
-rw-r--r--Command.hs2
-rw-r--r--Command/Fsck.hs2
-rw-r--r--Command/Optimize.hs35
-rw-r--r--GitAnnex.hs2
-rw-r--r--debian/changelog2
-rw-r--r--doc/git-annex.mdwn5
-rw-r--r--doc/walkthrough.mdwn1
-rw-r--r--doc/walkthrough/optimizing_repositories.mdwn13
-rw-r--r--test.hs12
9 files changed, 73 insertions, 1 deletions
diff --git a/Command.hs b/Command.hs
index 78f9823fb..75c3b4412 100644
--- a/Command.hs
+++ b/Command.hs
@@ -131,6 +131,8 @@ withAttrFilesInGit attr a params = do
repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
liftM (map a) $ liftIO $ Git.checkAttr repo attr files
+withNumCopies :: CommandSeekAttrFiles
+withNumCopies = withAttrFilesInGit "annex.numcopies"
withBackendFilesInGit :: CommandSeekBackendFiles
withBackendFilesInGit a params = do
repo <- Annex.gitRepo
diff --git a/Command/Fsck.hs b/Command/Fsck.hs
index 529a5015a..cdc68581e 100644
--- a/Command/Fsck.hs
+++ b/Command/Fsck.hs
@@ -34,7 +34,7 @@ command = [repoCommand "fsck" (paramOptional $ paramRepeating paramPath) seek
"check for problems"]
seek :: [CommandSeek]
-seek = [withAttrFilesInGit "annex.numcopies" start]
+seek = [withNumCopies start]
start :: CommandStartAttrFile
start (file, attr) = notBareRepo $ isAnnexed file $ \(key, backend) -> do
diff --git a/Command/Optimize.hs b/Command/Optimize.hs
new file mode 100644
index 000000000..40625fc2f
--- /dev/null
+++ b/Command/Optimize.hs
@@ -0,0 +1,35 @@
+{- git-annex command
+ -
+ - Copyright 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.Optimize where
+
+import Command
+import Utility
+import LocationLog
+import Trust
+import Config
+import qualified Command.Get
+import qualified Command.Drop
+
+command :: [Command]
+command = [repoCommand "optimize" (paramOptional $ paramRepeating paramPath) seek
+ "get or drop content to best use available space"]
+
+seek :: [CommandSeek]
+seek = [withNumCopies start]
+
+start :: CommandStartAttrFile
+start p@(file, attr) = notBareRepo $ isAnnexed file $ \(key, _) -> do
+ needed <- getNumCopies numcopies
+ (_, safelocations) <- trustPartition UnTrusted =<< keyLocations key
+ dispatch needed (length safelocations)
+ where
+ dispatch needed present
+ | present < needed = Command.Get.start file
+ | present > needed = Command.Drop.start p
+ | otherwise = stop
+ numcopies = readMaybe attr :: Maybe Int
diff --git a/GitAnnex.hs b/GitAnnex.hs
index 6f4e5d492..8b9e55750 100644
--- a/GitAnnex.hs
+++ b/GitAnnex.hs
@@ -34,6 +34,7 @@ import qualified Command.Init
import qualified Command.Describe
import qualified Command.InitRemote
import qualified Command.Fsck
+import qualified Command.Optimize
import qualified Command.Unused
import qualified Command.DropUnused
import qualified Command.Unlock
@@ -77,6 +78,7 @@ cmds = concat
, Command.SetKey.command
, Command.Fix.command
, Command.Fsck.command
+ , Command.Optimize.command
, Command.Unused.command
, Command.DropUnused.command
, Command.Find.command
diff --git a/debian/changelog b/debian/changelog
index 9ff745566..b02f6a15b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -5,6 +5,8 @@ git-annex (3.20110907) UNRELEASED; urgency=low
* Fix build without S3.
* addurl: Always use whole url as destination filename, rather than
only its file component.
+ * optimize: A new subcommand that either gets or drops file content
+ as needed to work toward meeting the configured numcopies setting.
-- Joey Hess <joeyh@debian.org> Tue, 06 Sep 2011 16:59:15 -0400
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 0a484a384..8264c31b3 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -157,6 +157,11 @@ Many git-annex commands will stage changes for later `git commit` by you.
To avoid expensive checksum calculations, specify --fast
+* optimize [path ...]
+
+ Either gets or drops file content, as needed, to work toward meeting the
+ configured numcopies setting.
+
* unused
Checks the annex for data that does not correspond to any files present
diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn
index eaae6b455..b0eb25815 100644
--- a/doc/walkthrough.mdwn
+++ b/doc/walkthrough.mdwn
@@ -18,5 +18,6 @@ A walkthrough of the basic features of git-annex.
fsck:_verifying_your_data
fsck:_when_things_go_wrong
backups
+ optimizing_repositories
more
"""]]
diff --git a/doc/walkthrough/optimizing_repositories.mdwn b/doc/walkthrough/optimizing_repositories.mdwn
new file mode 100644
index 000000000..0f17f1dea
--- /dev/null
+++ b/doc/walkthrough/optimizing_repositories.mdwn
@@ -0,0 +1,13 @@
+Once you have multiple repositories, and have perhaps configured numcopies,
+any given file can have many more copies than is needed, or perhaps fewer
+than you would like. Fsck can detect the latter problem, but there's another
+command that can help deal with both problems.
+
+The optimize subcommand either gets or drops file content, as needed,
+to work toward meeting the configured numcopies setting.
+
+ # git annex optimize
+ get my_cool_big_file (from laptop...) ok
+ drop other_file ok
+ # git annex optimize --numcopies=2
+ get other_file ok
diff --git a/test.hs b/test.hs
index 4d751a707..bd2e1e46c 100644
--- a/test.hs
+++ b/test.hs
@@ -93,6 +93,7 @@ blackbox = TestLabel "blackbox" $ TestList
, test_unannex
, test_drop
, test_get
+ , test_optimize
, test_move
, test_copy
, test_lock
@@ -216,6 +217,17 @@ test_get = "git-annex get" ~: TestCase $ intmpclonerepo $ do
inmainrepo $ unannexed ingitfile
unannexed ingitfile
+test_optimize :: Test
+test_optimize = "git-annex optimize" ~: TestCase $ intmpclonerepo $ do
+ inmainrepo $ annexed_present annexedfile
+ annexed_notpresent annexedfile
+ git_annex "optimize" ["-q", annexedfile, "--numcopies=2"] @? "optimize of file failed"
+ inmainrepo $ annexed_present annexedfile
+ annexed_present annexedfile
+ git_annex "optimize" ["-q", annexedfile] @? "optimize of file failed"
+ inmainrepo $ annexed_present annexedfile
+ annexed_notpresent annexedfile
+
test_move :: Test
test_move = "git-annex move" ~: TestCase $ intmpclonerepo $ do
annexed_notpresent annexedfile