diff options
author | Joey Hess <joey@kitenet.net> | 2011-04-02 20:59:41 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2011-04-02 21:35:02 -0400 |
commit | 868300d4c1dafd2c4b91ad3f369cfb48f14bb82a (patch) | |
tree | 6108bc12c88d54be0763dc6e5cdc382461a9667c | |
parent | 09a16176dea5ef2a51e3a3d00d77180966c597d9 (diff) |
unused/dropunused: support --from
-rw-r--r-- | Command/DropUnused.hs | 31 | ||||
-rw-r--r-- | Command/Unused.hs | 116 | ||||
-rw-r--r-- | debian/changelog | 3 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 16 | ||||
-rw-r--r-- | doc/special_remotes.mdwn | 23 | ||||
-rw-r--r-- | doc/walkthrough/unused_data.mdwn | 2 |
6 files changed, 147 insertions, 44 deletions
diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs index 932a8b863..1eec68820 100644 --- a/Command/DropUnused.hs +++ b/Command/DropUnused.hs @@ -19,6 +19,8 @@ import Messages import Locations import qualified Annex import qualified Command.Drop +import qualified Command.Move +import qualified Remote import Backend import Key @@ -40,15 +42,28 @@ start m s = notBareRepo $ do case M.lookup s m of Nothing -> return Nothing Just key -> do - g <- Annex.gitRepo showStart "dropunused" s - backend <- keyBackend key - -- drop both content in the backend and any tmp - -- file for the key - let tmp = gitAnnexTmpLocation g key - tmp_exists <- liftIO $ doesFileExist tmp - when tmp_exists $ liftIO $ removeFile tmp - return $ Just $ Command.Drop.perform key backend (Just 0) + from <- Annex.getState Annex.fromremote + case from of + Just name -> do + r <- Remote.byName name + return $ Just $ performRemote r key + _ -> return $ Just $ perform key + +{- drop both content in the backend and any tmp file for the key -} +perform :: Key -> CommandPerform +perform key = do + g <- Annex.gitRepo + let tmp = gitAnnexTmpLocation g key + tmp_exists <- liftIO $ doesFileExist tmp + when tmp_exists $ liftIO $ removeFile tmp + backend <- keyBackend key + Command.Drop.perform key backend (Just 0) -- force drop + +performRemote :: Remote.Remote Annex -> Key -> CommandPerform +performRemote r key = do + showNote $ "from " ++ Remote.name r ++ "..." + return $ Just $ Command.Move.fromCleanup r True key readUnusedLog :: Annex (M.Map String Key) readUnusedLog = do diff --git a/Command/Unused.hs b/Command/Unused.hs index 83d8757cf..a3fb6fe23 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -20,9 +20,11 @@ import Content import Messages import Locations import Utility +import LocationLog import qualified Annex import qualified GitRepo as Git import qualified Backend +import qualified Remote command :: [Command] command = [repoCommand "unused" paramNothing seek @@ -39,35 +41,54 @@ start = notBareRepo $ do perform :: CommandPerform perform = do - _ <- checkUnused + from <- Annex.getState Annex.fromremote + case from of + Just name -> do + r <- Remote.byName name + checkRemoteUnused r + _ -> checkUnused return $ Just $ return True -checkUnused :: Annex Bool +checkUnused :: Annex () checkUnused = do (unused, staletmp) <- unusedKeys let unusedlist = number 0 unused let staletmplist = number (length unused) staletmp let list = unusedlist ++ staletmplist - g <- Annex.gitRepo - liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $ - map (\(n, k) -> show n ++ " " ++ show k) list - unless (null unused) $ showLongNote $ unusedmsg unusedlist - unless (null staletmp) $ showLongNote $ staletmpmsg staletmplist + writeUnusedFile list + unless (null unused) $ showLongNote $ unusedMsg unusedlist + unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist unless (null list) $ showLongNote $ "\n" - return $ null list +checkRemoteUnused :: Remote.Remote Annex -> Annex () +checkRemoteUnused r = do + g <- Annex.gitRepo + showNote $ "checking for unused data on " ++ Remote.name r ++ "..." + referenced <- getKeysReferenced + logged <- liftIO $ loggedKeys g + remotehas <- filterM isthere logged + let remoteunused = remotehas `exclude` referenced + let list = number 0 remoteunused + writeUnusedFile list + unless (null remoteunused) $ do + showLongNote $ remoteUnusedMsg r list + showLongNote $ "\n" + where + isthere k = do + g <- Annex.gitRepo + us <- liftIO $ keyLocations g k + return $ uuid `elem` us + uuid = Remote.uuid r + +writeUnusedFile :: [(Int, Key)] -> Annex () +writeUnusedFile l = do + g <- Annex.gitRepo + liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ + unlines $ map (\(n, k) -> show n ++ " " ++ show k) l + +table :: [(Int, Key)] -> [String] +table l = [" NUMBER KEY"] ++ map cols l where - unusedmsg u = unlines $ - ["Some annexed data is no longer pointed to by any files in the repository:"] - ++ table u ++ - ["(To see where data was previously used, try: git log --stat -S'KEY')"] ++ - dropmsg - staletmpmsg t = unlines $ - ["Some partially transferred data exists in temporary files:"] - ++ table t ++ dropmsg - dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"] - - table l = [" NUMBER KEY"] ++ map cols l cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k pad n s = s ++ replicate (n - length s) ' ' @@ -75,6 +96,39 @@ number :: Int -> [a] -> [(Int, a)] number _ [] = [] number n (x:xs) = (n+1, x):(number (n+1) xs) +staleTmpMsg :: [(Int, Key)] -> String +staleTmpMsg t = unlines $ + ["Some partially transferred data exists in temporary files:"] + ++ table t ++ [dropMsg Nothing] + +unusedMsg :: [(Int, Key)] -> String +unusedMsg u = unusedMsg' u + ["Some annexed data is no longer used by any files in the repository:"] + [dropMsg Nothing] + +remoteUnusedMsg :: Remote.Remote Annex -> [(Int, Key)] -> String +remoteUnusedMsg r u = unusedMsg' u + ["Some annexed data on " ++ name ++ + " is not used by any files in this repository."] + [dropMsg $ Just r, + "Please be cautious -- are you sure that the remote repository", + "does not use this data?"] + where + name = Remote.name r + +unusedMsg' :: [(Int, Key)] -> [String] -> [String] -> String +unusedMsg' u header trailer = unlines $ + header ++ + table u ++ + ["(To see where data was previously used, try: git log --stat -S'KEY')"] ++ + trailer + +dropMsg :: Maybe (Remote.Remote Annex) -> String +dropMsg Nothing = dropMsg' "" +dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r +dropMsg' :: String -> String +dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)" + {- Finds keys whose content is present, but that do not seem to be used - by any files in the git repo, or that are only present as tmp files. -} unusedKeys :: Annex ([Key], [Key]) @@ -93,7 +147,9 @@ unusedKeys = do referenced <- getKeysReferenced tmps <- tmpKeys - let (unused, staletmp, duptmp) = calcUnusedKeys present referenced tmps + let unused = present `exclude` referenced + let staletmp = tmps `exclude` present + let duptmp = tmps `exclude` staletmp -- Tmp files that are dups of content already present -- can simply be removed. @@ -102,18 +158,16 @@ unusedKeys = do return (unused, staletmp) -calcUnusedKeys :: [Key] -> [Key] -> [Key] -> ([Key], [Key], [Key]) -calcUnusedKeys present referenced tmps = (unused, staletmp, duptmp) +{- Finds items in the first, smaller list, that are not + - present in the second, larger list. + - + - Constructing a single set, of the list that tends to be + - smaller, appears more efficient in both memory and CPU + - than constructing and taking the S.difference of two sets. -} +exclude :: Ord a => [a] -> [a] -> [a] +exclude [] _ = [] -- optimisation +exclude smaller larger = S.toList $ remove larger $ S.fromList smaller where - unused = present `exclude` referenced - staletmp = tmps `exclude` present - duptmp = tmps `exclude` staletmp - - -- Constructing a single set, of the list that tends to be - -- smaller, appears more efficient in both memory and CPU - -- than constructing and taking the S.difference of two sets. - exclude [] _ = [] -- optimisation - exclude smaller larger = S.toList $ remove larger $ S.fromList smaller remove a b = foldl (flip S.delete) b a {- List of keys referenced by symlinks in the git repo. -} diff --git a/debian/changelog b/debian/changelog index 29f60063e..e504bd8f6 100644 --- a/debian/changelog +++ b/debian/changelog @@ -4,6 +4,9 @@ git-annex (0.20110402) UNRELEASED; urgency=low some issues with git on OSX with the mixed-case directories. No migration is needed; the old mixed case hash directories are still read; new information is written to the new directories. + * Unused files on remotes, particulary special remotes, can now be + identified and dropped, by using "--from remote" with git annex unused + and git annex dropunused. -- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index d890b518b..7d0fb3e79 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -155,16 +155,21 @@ Many git-annex commands will stage changes for later `git commit` by you. * unused - Checks the annex for data that is not used by any files currently - in the annex, and prints a numbered list of the data. + Checks the annex for data that does not correspond to any files currently + in the respository, and prints a numbered list of the data. To only show unused temp files, specify --fast + To check data on a remote that does not correspond to any files currently + in the local repository, specify --from. + * dropunused [number ...] Drops the data corresponding to the numbers, as listed by the last `git annex unused` + To drop the data from a remote, specify --from. + * find [path ...] Outputs a list of annexed files whose content is currently present. @@ -317,12 +322,15 @@ Many git-annex commands will stage changes for later `git commit` by you. * --from=repository - Specifies a repository that content will be retrieved from. + Specifies a repository that content will be retrieved from, or that + should otherwise be acted on. + It should be specified using the name of a configured remote. * --to=repository - Specifies a repository that content will be sent to. + Specifies a repository that content will be sent to. + It should be specified using the name of a configured remote. * --exclude=glob diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn index 09b751d0f..f4d479aa9 100644 --- a/doc/special_remotes.mdwn +++ b/doc/special_remotes.mdwn @@ -8,3 +8,26 @@ They cannot be used by other git commands though. * [[Amazon_S3]] * [[directory]] + +## Unused content on special remotes + +Over time, special remotes can accumulate file content that is no longer +referred to by files in git. Normally, unused content in the current +repository is found by running `git annex unused`. To detect unused content +on special remotes, instead use `git annex unused --from`. Example: + + $ git annex unused --from mys3 + unused (checking for unused data on mys3...) + Some annexed data on mys3 is not used by any files in this repository. + NUMBER KEY + 1 WORM-s3-m1301674316--foo + (To see where data was previously used, try: git log --stat -S'KEY') + (To remove unwanted data: git-annex dropunused --from mys3 NUMBER) + Please be cautious -- are you sure that the remote repository + does not use this data? + $ git annex dropunused --from mys3 1 + dropunused 12948 (from mys3...) ok + +Do be cautious when using this; it cannot detect if content in a remote +is used by that remote, or is the last copy of data that is used by +some *other* remote. diff --git a/doc/walkthrough/unused_data.mdwn b/doc/walkthrough/unused_data.mdwn index 9be32577c..2f8edcd38 100644 --- a/doc/walkthrough/unused_data.mdwn +++ b/doc/walkthrough/unused_data.mdwn @@ -10,7 +10,7 @@ eliminate it to save space. # git annex unused unused (checking for unused data...) - Some annexed data is no longer pointed to by any files in the repository. + Some annexed data is no longer used by any files in the repository. NUMBER KEY 1 WORM-s3-m1289672605--file 2 WORM-s14-m1289672605--file |