summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2011-04-02 20:59:41 -0400
committerGravatar Joey Hess <joey@kitenet.net>2011-04-02 21:35:02 -0400
commit868300d4c1dafd2c4b91ad3f369cfb48f14bb82a (patch)
tree6108bc12c88d54be0763dc6e5cdc382461a9667c
parent09a16176dea5ef2a51e3a3d00d77180966c597d9 (diff)
unused/dropunused: support --from
-rw-r--r--Command/DropUnused.hs31
-rw-r--r--Command/Unused.hs116
-rw-r--r--debian/changelog3
-rw-r--r--doc/git-annex.mdwn16
-rw-r--r--doc/special_remotes.mdwn23
-rw-r--r--doc/walkthrough/unused_data.mdwn2
6 files changed, 147 insertions, 44 deletions
diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs
index 932a8b863..1eec68820 100644
--- a/Command/DropUnused.hs
+++ b/Command/DropUnused.hs
@@ -19,6 +19,8 @@ import Messages
import Locations
import qualified Annex
import qualified Command.Drop
+import qualified Command.Move
+import qualified Remote
import Backend
import Key
@@ -40,15 +42,28 @@ start m s = notBareRepo $ do
case M.lookup s m of
Nothing -> return Nothing
Just key -> do
- g <- Annex.gitRepo
showStart "dropunused" s
- backend <- keyBackend key
- -- drop both content in the backend and any tmp
- -- file for the key
- let tmp = gitAnnexTmpLocation g key
- tmp_exists <- liftIO $ doesFileExist tmp
- when tmp_exists $ liftIO $ removeFile tmp
- return $ Just $ Command.Drop.perform key backend (Just 0)
+ from <- Annex.getState Annex.fromremote
+ case from of
+ Just name -> do
+ r <- Remote.byName name
+ return $ Just $ performRemote r key
+ _ -> return $ Just $ perform key
+
+{- drop both content in the backend and any tmp file for the key -}
+perform :: Key -> CommandPerform
+perform key = do
+ g <- Annex.gitRepo
+ let tmp = gitAnnexTmpLocation g key
+ tmp_exists <- liftIO $ doesFileExist tmp
+ when tmp_exists $ liftIO $ removeFile tmp
+ backend <- keyBackend key
+ Command.Drop.perform key backend (Just 0) -- force drop
+
+performRemote :: Remote.Remote Annex -> Key -> CommandPerform
+performRemote r key = do
+ showNote $ "from " ++ Remote.name r ++ "..."
+ return $ Just $ Command.Move.fromCleanup r True key
readUnusedLog :: Annex (M.Map String Key)
readUnusedLog = do
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 83d8757cf..a3fb6fe23 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -20,9 +20,11 @@ import Content
import Messages
import Locations
import Utility
+import LocationLog
import qualified Annex
import qualified GitRepo as Git
import qualified Backend
+import qualified Remote
command :: [Command]
command = [repoCommand "unused" paramNothing seek
@@ -39,35 +41,54 @@ start = notBareRepo $ do
perform :: CommandPerform
perform = do
- _ <- checkUnused
+ from <- Annex.getState Annex.fromremote
+ case from of
+ Just name -> do
+ r <- Remote.byName name
+ checkRemoteUnused r
+ _ -> checkUnused
return $ Just $ return True
-checkUnused :: Annex Bool
+checkUnused :: Annex ()
checkUnused = do
(unused, staletmp) <- unusedKeys
let unusedlist = number 0 unused
let staletmplist = number (length unused) staletmp
let list = unusedlist ++ staletmplist
- g <- Annex.gitRepo
- liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $
- map (\(n, k) -> show n ++ " " ++ show k) list
- unless (null unused) $ showLongNote $ unusedmsg unusedlist
- unless (null staletmp) $ showLongNote $ staletmpmsg staletmplist
+ writeUnusedFile list
+ unless (null unused) $ showLongNote $ unusedMsg unusedlist
+ unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist
unless (null list) $ showLongNote $ "\n"
- return $ null list
+checkRemoteUnused :: Remote.Remote Annex -> Annex ()
+checkRemoteUnused r = do
+ g <- Annex.gitRepo
+ showNote $ "checking for unused data on " ++ Remote.name r ++ "..."
+ referenced <- getKeysReferenced
+ logged <- liftIO $ loggedKeys g
+ remotehas <- filterM isthere logged
+ let remoteunused = remotehas `exclude` referenced
+ let list = number 0 remoteunused
+ writeUnusedFile list
+ unless (null remoteunused) $ do
+ showLongNote $ remoteUnusedMsg r list
+ showLongNote $ "\n"
+ where
+ isthere k = do
+ g <- Annex.gitRepo
+ us <- liftIO $ keyLocations g k
+ return $ uuid `elem` us
+ uuid = Remote.uuid r
+
+writeUnusedFile :: [(Int, Key)] -> Annex ()
+writeUnusedFile l = do
+ g <- Annex.gitRepo
+ liftIO $ safeWriteFile (gitAnnexUnusedLog g) $
+ unlines $ map (\(n, k) -> show n ++ " " ++ show k) l
+
+table :: [(Int, Key)] -> [String]
+table l = [" NUMBER KEY"] ++ map cols l
where
- unusedmsg u = unlines $
- ["Some annexed data is no longer pointed to by any files in the repository:"]
- ++ table u ++
- ["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
- dropmsg
- staletmpmsg t = unlines $
- ["Some partially transferred data exists in temporary files:"]
- ++ table t ++ dropmsg
- dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
-
- table l = [" NUMBER KEY"] ++ map cols l
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
pad n s = s ++ replicate (n - length s) ' '
@@ -75,6 +96,39 @@ number :: Int -> [a] -> [(Int, a)]
number _ [] = []
number n (x:xs) = (n+1, x):(number (n+1) xs)
+staleTmpMsg :: [(Int, Key)] -> String
+staleTmpMsg t = unlines $
+ ["Some partially transferred data exists in temporary files:"]
+ ++ table t ++ [dropMsg Nothing]
+
+unusedMsg :: [(Int, Key)] -> String
+unusedMsg u = unusedMsg' u
+ ["Some annexed data is no longer used by any files in the repository:"]
+ [dropMsg Nothing]
+
+remoteUnusedMsg :: Remote.Remote Annex -> [(Int, Key)] -> String
+remoteUnusedMsg r u = unusedMsg' u
+ ["Some annexed data on " ++ name ++
+ " is not used by any files in this repository."]
+ [dropMsg $ Just r,
+ "Please be cautious -- are you sure that the remote repository",
+ "does not use this data?"]
+ where
+ name = Remote.name r
+
+unusedMsg' :: [(Int, Key)] -> [String] -> [String] -> String
+unusedMsg' u header trailer = unlines $
+ header ++
+ table u ++
+ ["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
+ trailer
+
+dropMsg :: Maybe (Remote.Remote Annex) -> String
+dropMsg Nothing = dropMsg' ""
+dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
+dropMsg' :: String -> String
+dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)"
+
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo, or that are only present as tmp files. -}
unusedKeys :: Annex ([Key], [Key])
@@ -93,7 +147,9 @@ unusedKeys = do
referenced <- getKeysReferenced
tmps <- tmpKeys
- let (unused, staletmp, duptmp) = calcUnusedKeys present referenced tmps
+ let unused = present `exclude` referenced
+ let staletmp = tmps `exclude` present
+ let duptmp = tmps `exclude` staletmp
-- Tmp files that are dups of content already present
-- can simply be removed.
@@ -102,18 +158,16 @@ unusedKeys = do
return (unused, staletmp)
-calcUnusedKeys :: [Key] -> [Key] -> [Key] -> ([Key], [Key], [Key])
-calcUnusedKeys present referenced tmps = (unused, staletmp, duptmp)
+{- Finds items in the first, smaller list, that are not
+ - present in the second, larger list.
+ -
+ - Constructing a single set, of the list that tends to be
+ - smaller, appears more efficient in both memory and CPU
+ - than constructing and taking the S.difference of two sets. -}
+exclude :: Ord a => [a] -> [a] -> [a]
+exclude [] _ = [] -- optimisation
+exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
where
- unused = present `exclude` referenced
- staletmp = tmps `exclude` present
- duptmp = tmps `exclude` staletmp
-
- -- Constructing a single set, of the list that tends to be
- -- smaller, appears more efficient in both memory and CPU
- -- than constructing and taking the S.difference of two sets.
- exclude [] _ = [] -- optimisation
- exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
remove a b = foldl (flip S.delete) b a
{- List of keys referenced by symlinks in the git repo. -}
diff --git a/debian/changelog b/debian/changelog
index 29f60063e..e504bd8f6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -4,6 +4,9 @@ git-annex (0.20110402) UNRELEASED; urgency=low
some issues with git on OSX with the mixed-case directories.
No migration is needed; the old mixed case hash directories are still
read; new information is written to the new directories.
+ * Unused files on remotes, particulary special remotes, can now be
+ identified and dropped, by using "--from remote" with git annex unused
+ and git annex dropunused.
-- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index d890b518b..7d0fb3e79 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -155,16 +155,21 @@ Many git-annex commands will stage changes for later `git commit` by you.
* unused
- Checks the annex for data that is not used by any files currently
- in the annex, and prints a numbered list of the data.
+ Checks the annex for data that does not correspond to any files currently
+ in the respository, and prints a numbered list of the data.
To only show unused temp files, specify --fast
+ To check data on a remote that does not correspond to any files currently
+ in the local repository, specify --from.
+
* dropunused [number ...]
Drops the data corresponding to the numbers, as listed by the last
`git annex unused`
+ To drop the data from a remote, specify --from.
+
* find [path ...]
Outputs a list of annexed files whose content is currently present.
@@ -317,12 +322,15 @@ Many git-annex commands will stage changes for later `git commit` by you.
* --from=repository
- Specifies a repository that content will be retrieved from.
+ Specifies a repository that content will be retrieved from, or that
+ should otherwise be acted on.
+
It should be specified using the name of a configured remote.
* --to=repository
- Specifies a repository that content will be sent to.
+ Specifies a repository that content will be sent to.
+
It should be specified using the name of a configured remote.
* --exclude=glob
diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn
index 09b751d0f..f4d479aa9 100644
--- a/doc/special_remotes.mdwn
+++ b/doc/special_remotes.mdwn
@@ -8,3 +8,26 @@ They cannot be used by other git commands though.
* [[Amazon_S3]]
* [[directory]]
+
+## Unused content on special remotes
+
+Over time, special remotes can accumulate file content that is no longer
+referred to by files in git. Normally, unused content in the current
+repository is found by running `git annex unused`. To detect unused content
+on special remotes, instead use `git annex unused --from`. Example:
+
+ $ git annex unused --from mys3
+ unused (checking for unused data on mys3...)
+ Some annexed data on mys3 is not used by any files in this repository.
+ NUMBER KEY
+ 1 WORM-s3-m1301674316--foo
+ (To see where data was previously used, try: git log --stat -S'KEY')
+ (To remove unwanted data: git-annex dropunused --from mys3 NUMBER)
+ Please be cautious -- are you sure that the remote repository
+ does not use this data?
+ $ git annex dropunused --from mys3 1
+ dropunused 12948 (from mys3...) ok
+
+Do be cautious when using this; it cannot detect if content in a remote
+is used by that remote, or is the last copy of data that is used by
+some *other* remote.
diff --git a/doc/walkthrough/unused_data.mdwn b/doc/walkthrough/unused_data.mdwn
index 9be32577c..2f8edcd38 100644
--- a/doc/walkthrough/unused_data.mdwn
+++ b/doc/walkthrough/unused_data.mdwn
@@ -10,7 +10,7 @@ eliminate it to save space.
# git annex unused
unused (checking for unused data...)
- Some annexed data is no longer pointed to by any files in the repository.
+ Some annexed data is no longer used by any files in the repository.
NUMBER KEY
1 WORM-s3-m1289672605--file
2 WORM-s14-m1289672605--file