summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-08-31 17:38:33 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-08-31 17:51:13 -0400
commit2afb4de6c02a4515f8b1bf6e24a32cbde7eae5a8 (patch)
tree22f667488fe21140622dd235fed81d03c1b747de
parent2b83639fac92307deeaa7b1bc75a0c71f35e5b1e (diff)
forget --drop-dead: Completely removes mentions of repositories that have been marked as dead from the git-annex branch.
Wrote nice pure transition calculator, and ugly code to stage its results into the git-annex branch. Also had to split up several Log modules that Annex.Branch needed to use, but that themselves used Annex.Branch. The transition calculator is limited to looking at and changing one file at a time. While this made the implementation relatively easy, it precludes transitions that do stuff like deleting old url log files for keys that are being removed because they are no longer present anywhere.
-rw-r--r--Annex/Branch.hs91
-rw-r--r--Annex/Branch/Transitions.hs53
-rw-r--r--Command/Forget.hs31
-rw-r--r--Logs/Presence.hs83
-rw-r--r--Logs/Presence/Pure.hs84
-rw-r--r--Logs/Transitions.hs13
-rw-r--r--Logs/Trust.hs28
-rw-r--r--Logs/Trust/Pure.hs36
-rw-r--r--debian/changelog2
-rw-r--r--doc/git-annex.mdwn13
10 files changed, 279 insertions, 155 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 334b60634..9ee281de9 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -46,8 +46,12 @@ import Annex.CatFile
import Annex.Perms
import qualified Annex
import Utility.Env
+import Logs
import Logs.Transitions
+import Logs.Trust.Pure
import Annex.ReplaceFile
+import qualified Annex.Queue
+import Annex.Branch.Transitions
{- Name of the branch that is used to store git-annex's information. -}
name :: Git.Ref
@@ -194,7 +198,10 @@ get' :: FilePath -> Annex String
get' file = go =<< getJournalFile file
where
go (Just journalcontent) = return journalcontent
- go Nothing = withIndex $ L.unpack <$> catFile fullname file
+ go Nothing = getRaw file
+
+getRaw :: FilePath -> Annex String
+getRaw file = withIndex $ L.unpack <$> catFile fullname file
{- Applies a function to modifiy the content of a file.
-
@@ -272,13 +279,17 @@ commitBranch' branchref message parents = do
files :: Annex [FilePath]
files = do
update
- withIndex $ do
- bfiles <- inRepo $ Git.Command.pipeNullSplitZombie
- [ Params "ls-tree --name-only -r -z"
- , Param $ show fullname
- ]
- jfiles <- getJournalledFiles
- return $ jfiles ++ bfiles
+ (++)
+ <$> branchFiles
+ <*> getJournalledFiles
+
+{- Files in the branch, not including any from journalled changes,
+ - and without updating the branch. -}
+branchFiles :: Annex [FilePath]
+branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie
+ [ Params "ls-tree --name-only -r -z"
+ , Param $ show fullname
+ ]
{- Populates the branch's index file with the current branch contents.
-
@@ -436,20 +447,60 @@ getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content
- commits it to the branch, or creates a new branch, and returns
- the branch's ref. -}
performTransitions :: Transitions -> Bool -> Annex Git.Ref
-performTransitions ts neednewbranch = withIndex $ do
- when (inTransitions ForgetDeadRemotes ts) $
- error "TODO ForgetDeadRemotes transition"
- if neednewbranch
- then do
- committedref <- inRepo $ Git.Branch.commit message fullname []
- setIndexSha committedref
- return committedref
- else do
- ref <- getBranch
- commitBranch ref message [fullname]
- getBranch
+performTransitions ts neednewbranch = do
+ -- For simplicity & speed, we're going to use the Annex.Queue to
+ -- update the git-annex branch, while it usually holds changes
+ -- for the head branch. Flush any such changes.
+ Annex.Queue.flush
+ withIndex $ do
+ run $ mapMaybe getTransitionCalculator $ transitionList ts
+ Annex.Queue.flush
+ if neednewbranch
+ then do
+ committedref <- inRepo $ Git.Branch.commit message fullname []
+ setIndexSha committedref
+ return committedref
+ else do
+ ref <- getBranch
+ commitBranch ref message [fullname]
+ getBranch
where
message
| neednewbranch = "new branch for transition " ++ tdesc
| otherwise = "continuing transition " ++ tdesc
tdesc = show $ map describeTransition $ transitionList ts
+
+ {- The changes to make to the branch are calculated and applied to
+ - the branch directly, rather than going through the journal,
+ - which would be innefficient. (And the journal is not designed
+ - to hold changes to every file in the branch at once.)
+ -
+ - When a file in the branch is changed by transition code,
+ - that value is remembered and fed into the code for subsequent
+ - transitions.
+ -}
+ run [] = noop
+ run changers = do
+ trustmap <- calcTrustMap <$> getRaw trustLog
+ fs <- branchFiles
+ hasher <- inRepo hashObjectStart
+ forM_ fs $ \f -> do
+ content <- getRaw f
+ apply changers hasher f content trustmap
+ liftIO $ hashObjectStop hasher
+ apply [] _ _ _ _ = return ()
+ apply (changer:rest) hasher file content trustmap =
+ case changer file content trustmap of
+ RemoveFile -> do
+ Annex.Queue.addUpdateIndex
+ =<< inRepo (Git.UpdateIndex.unstageFile file)
+ -- File is deleted; can't run any other
+ -- transitions on it.
+ return ()
+ ChangeFile content' -> do
+ sha <- inRepo $ hashObject BlobObject content'
+ Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $
+ Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath file)
+ apply rest hasher file content' trustmap
+ PreserveFile ->
+ apply rest hasher file content trustmap
diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs
new file mode 100644
index 000000000..90002de62
--- /dev/null
+++ b/Annex/Branch/Transitions.hs
@@ -0,0 +1,53 @@
+{- git-annex branch transitions
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.Branch.Transitions (
+ FileTransition(..),
+ getTransitionCalculator
+) where
+
+import Logs
+import Logs.Transitions
+import Logs.UUIDBased as UUIDBased
+import Logs.Presence.Pure as Presence
+import Types.TrustLevel
+import Types.UUID
+
+import qualified Data.Map as M
+
+data FileTransition
+ = ChangeFile String
+ | RemoveFile
+ | PreserveFile
+
+type TransitionCalculator = FilePath -> String -> TrustMap -> FileTransition
+
+getTransitionCalculator :: Transition -> Maybe TransitionCalculator
+getTransitionCalculator ForgetGitHistory = Nothing
+getTransitionCalculator ForgetDeadRemotes = Just dropDead
+
+dropDead :: FilePath -> String -> TrustMap -> FileTransition
+dropDead f content trustmap = case getLogVariety f of
+ Just UUIDBasedLog -> ChangeFile $
+ UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content
+ Just (PresenceLog _) ->
+ let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
+ in if null newlog
+ then RemoveFile
+ else ChangeFile $ Presence.showLog newlog
+ Nothing -> PreserveFile
+
+dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String
+dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const
+
+{- Presence logs can contain UUIDs or other values. Any line that matches
+ - a dead uuid is dropped; any other values are passed through. -}
+dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine]
+dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info)
+
+notDead :: TrustMap -> (v -> UUID) -> v -> Bool
+notDead trustmap a v = M.findWithDefault SemiTrusted (a v) trustmap /= DeadTrusted
diff --git a/Command/Forget.hs b/Command/Forget.hs
index e405a9918..d216ae3ca 100644
--- a/Command/Forget.hs
+++ b/Command/Forget.hs
@@ -12,30 +12,41 @@ import Command
import qualified Annex.Branch as Branch
import Logs.Transitions
import qualified Annex
+import qualified Option
import Data.Time.Clock.POSIX
def :: [Command]
-def = [command "forget" paramNothing seek
+def = [withOptions forgetOptions $ command "forget" paramNothing seek
SectionMaintenance "prune git-annex branch history"]
+forgetOptions :: [Option]
+forgetOptions = [dropDeadOption]
+
+dropDeadOption :: Option
+dropDeadOption = Option.flag [] "drop-dead" "drop references to dead repositories"
+
seek :: [CommandSeek]
-seek = [withNothing start]
+seek = [withFlag dropDeadOption $ \dropdead ->
+ withNothing $ start dropdead]
-start :: CommandStart
-start = do
+start :: Bool -> CommandStart
+start dropdead = do
showStart "forget" "git-annex"
- next $ perform =<< Annex.getState Annex.force
-
-perform :: Bool -> CommandPerform
-perform True = do
now <- liftIO getPOSIXTime
- let ts = addTransition now ForgetGitHistory noTransitions
+ let basets = addTransition now ForgetGitHistory noTransitions
+ let ts = if dropdead
+ then addTransition now ForgetDeadRemotes basets
+ else basets
+ next $ perform ts =<< Annex.getState Annex.force
+
+perform :: Transitions -> Bool -> CommandPerform
+perform ts True = do
recordTransitions Branch.change ts
-- get branch committed before contining with the transition
Branch.update
void $ Branch.performTransitions ts True
next $ return True
-perform False = do
+perform _ False = do
showLongNote "To forget git-annex branch history, you must specify --force. This deletes metadata!"
stop
diff --git a/Logs/Presence.hs b/Logs/Presence.hs
index ec5cec209..516d59618 100644
--- a/Logs/Presence.hs
+++ b/Logs/Presence.hs
@@ -12,36 +12,18 @@
-}
module Logs.Presence (
- LogStatus(..),
- LogLine(LogLine),
+ module X,
addLog,
readLog,
- getLog,
- parseLog,
- showLog,
logNow,
- compactLog,
- currentLog,
- prop_parse_show_log,
+ currentLog
) where
import Data.Time.Clock.POSIX
-import Data.Time
-import System.Locale
-import qualified Data.Map as M
+import Logs.Presence.Pure as X
import Common.Annex
import qualified Annex.Branch
-import Utility.QuickCheck
-
-data LogLine = LogLine {
- date :: POSIXTime,
- status :: LogStatus,
- info :: String
-} deriving (Eq, Show)
-
-data LogStatus = InfoPresent | InfoMissing
- deriving (Eq, Show, Bounded, Enum)
addLog :: FilePath -> LogLine -> Annex ()
addLog file line = Annex.Branch.change file $ \s ->
@@ -52,29 +34,6 @@ addLog file line = Annex.Branch.change file $ \s ->
readLog :: FilePath -> Annex [LogLine]
readLog = parseLog <$$> Annex.Branch.get
-{- Parses a log file. Unparseable lines are ignored. -}
-parseLog :: String -> [LogLine]
-parseLog = mapMaybe parseline . lines
- where
- parseline l = LogLine
- <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d)
- <*> parsestatus s
- <*> pure rest
- where
- (d, pastd) = separate (== ' ') l
- (s, rest) = separate (== ' ') pastd
- parsestatus "1" = Just InfoPresent
- parsestatus "0" = Just InfoMissing
- parsestatus _ = Nothing
-
-{- Generates a log file. -}
-showLog :: [LogLine] -> String
-showLog = unlines . map genline
- where
- genline (LogLine d s i) = unwords [show d, genstatus s, i]
- genstatus InfoPresent = "1"
- genstatus InfoMissing = "0"
-
{- Generates a new LogLine with the current date. -}
logNow :: LogStatus -> String -> Annex LogLine
logNow s i = do
@@ -84,39 +43,3 @@ logNow s i = do
{- Reads a log and returns only the info that is still in effect. -}
currentLog :: FilePath -> Annex [String]
currentLog file = map info . filterPresent <$> readLog file
-
-{- Given a log, returns only the info that is are still in effect. -}
-getLog :: String -> [String]
-getLog = map info . filterPresent . parseLog
-
-{- Returns the info from LogLines that are in effect. -}
-filterPresent :: [LogLine] -> [LogLine]
-filterPresent = filter (\l -> InfoPresent == status l) . compactLog
-
-{- Compacts a set of logs, returning a subset that contains the current
- - status. -}
-compactLog :: [LogLine] -> [LogLine]
-compactLog = M.elems . foldr mapLog M.empty
-
-type LogMap = M.Map String LogLine
-
-{- Inserts a log into a map of logs, if the log has better (ie, newer)
- - information than the other logs in the map -}
-mapLog :: LogLine -> LogMap -> LogMap
-mapLog l m
- | better = M.insert i l m
- | otherwise = m
- where
- better = maybe True newer $ M.lookup i m
- newer l' = date l' <= date l
- i = info l
-
-instance Arbitrary LogLine where
- arbitrary = LogLine
- <$> arbitrary
- <*> elements [minBound..maxBound]
- <*> arbitrary `suchThat` ('\n' `notElem`)
-
-prop_parse_show_log :: [LogLine] -> Bool
-prop_parse_show_log l = parseLog (showLog l) == l
-
diff --git a/Logs/Presence/Pure.hs b/Logs/Presence/Pure.hs
new file mode 100644
index 000000000..ffeb78b26
--- /dev/null
+++ b/Logs/Presence/Pure.hs
@@ -0,0 +1,84 @@
+{- git-annex presence log, pure operations
+ -
+ - Copyright 2010-2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.Presence.Pure where
+
+import Data.Time.Clock.POSIX
+import Data.Time
+import System.Locale
+import qualified Data.Map as M
+
+import Common.Annex
+import Utility.QuickCheck
+
+data LogLine = LogLine {
+ date :: POSIXTime,
+ status :: LogStatus,
+ info :: String
+} deriving (Eq, Show)
+
+data LogStatus = InfoPresent | InfoMissing
+ deriving (Eq, Show, Bounded, Enum)
+
+{- Parses a log file. Unparseable lines are ignored. -}
+parseLog :: String -> [LogLine]
+parseLog = mapMaybe parseline . lines
+ where
+ parseline l = LogLine
+ <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d)
+ <*> parsestatus s
+ <*> pure rest
+ where
+ (d, pastd) = separate (== ' ') l
+ (s, rest) = separate (== ' ') pastd
+ parsestatus "1" = Just InfoPresent
+ parsestatus "0" = Just InfoMissing
+ parsestatus _ = Nothing
+
+{- Generates a log file. -}
+showLog :: [LogLine] -> String
+showLog = unlines . map genline
+ where
+ genline (LogLine d s i) = unwords [show d, genstatus s, i]
+ genstatus InfoPresent = "1"
+ genstatus InfoMissing = "0"
+
+{- Given a log, returns only the info that is are still in effect. -}
+getLog :: String -> [String]
+getLog = map info . filterPresent . parseLog
+
+{- Returns the info from LogLines that are in effect. -}
+filterPresent :: [LogLine] -> [LogLine]
+filterPresent = filter (\l -> InfoPresent == status l) . compactLog
+
+{- Compacts a set of logs, returning a subset that contains the current
+ - status. -}
+compactLog :: [LogLine] -> [LogLine]
+compactLog = M.elems . foldr mapLog M.empty
+
+type LogMap = M.Map String LogLine
+
+{- Inserts a log into a map of logs, if the log has better (ie, newer)
+ - information than the other logs in the map -}
+mapLog :: LogLine -> LogMap -> LogMap
+mapLog l m
+ | better = M.insert i l m
+ | otherwise = m
+ where
+ better = maybe True newer $ M.lookup i m
+ newer l' = date l' <= date l
+ i = info l
+
+instance Arbitrary LogLine where
+ arbitrary = LogLine
+ <$> arbitrary
+ <*> elements [minBound..maxBound]
+ <*> arbitrary `suchThat` ('\n' `notElem`)
+
+prop_parse_show_log :: [LogLine] -> Bool
+prop_parse_show_log l = parseLog (showLog l) == l
+
diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs
index d4b7d5eb3..783ce5090 100644
--- a/Logs/Transitions.hs
+++ b/Logs/Transitions.hs
@@ -7,16 +7,6 @@
- done that is listed in the remote branch by checking that the local
- branch contains the same transition, with the same or newer start time.
-
- - When a remote branch that has had an transition performed on it
- - becomes available for merging into the local git-annex branch,
- - the transition is first performed on the local branch.
- -
- - When merging a remote branch into the local git-annex branch,
- - all transitions that have been performed on the local branch must also
- - have been performed on the remote branch already. (Or it would be
- - possible to perform the transitions on a fixup branch and merge it,
- - but that would be expensive.)
- -
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
@@ -86,9 +76,6 @@ parseTransitionLine s = TransitionLine <$> pdate ds <*> readish ts
combineTransitions :: [Transitions] -> Transitions
combineTransitions = S.unions
-inTransitions :: Transition -> Transitions -> Bool
-inTransitions t = not . S.null . S.filter (\l -> transition l == t)
-
transitionList :: Transitions -> [Transition]
transitionList = map transition . S.elems
diff --git a/Logs/Trust.hs b/Logs/Trust.hs
index 6c6b33f70..c6f0ad3ab 100644
--- a/Logs/Trust.hs
+++ b/Logs/Trust.hs
@@ -6,6 +6,7 @@
-}
module Logs.Trust (
+ module X,
trustLog,
TrustLevel(..),
trustGet,
@@ -16,8 +17,6 @@ module Logs.Trust (
lookupTrust,
trustMapLoad,
trustMapRaw,
-
- prop_parse_show_TrustLog,
) where
import qualified Data.Map as M
@@ -31,6 +30,7 @@ import Logs
import Logs.UUIDBased
import Remote.List
import qualified Types.Remote
+import Logs.Trust.Pure as X
{- Returns a list of UUIDs that the trustLog indicates have the
- specified trust level.
@@ -94,26 +94,4 @@ trustMapLoad = do
{- Does not include forcetrust or git config values, just those from the
- log file. -}
trustMapRaw :: Annex TrustMap
-trustMapRaw = simpleMap . parseLog (Just . parseTrustLog)
- <$> Annex.Branch.get trustLog
-
-{- The trust.log used to only list trusted repos, without a field for the
- - trust status, which is why this defaults to Trusted. -}
-parseTrustLog :: String -> TrustLevel
-parseTrustLog s = maybe Trusted parse $ headMaybe $ words s
- where
- parse "1" = Trusted
- parse "0" = UnTrusted
- parse "X" = DeadTrusted
- parse _ = SemiTrusted
-
-showTrustLog :: TrustLevel -> String
-showTrustLog Trusted = "1"
-showTrustLog UnTrusted = "0"
-showTrustLog DeadTrusted = "X"
-showTrustLog SemiTrusted = "?"
-
-prop_parse_show_TrustLog :: Bool
-prop_parse_show_TrustLog = all check [minBound .. maxBound]
- where
- check l = parseTrustLog (showTrustLog l) == l
+trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog
diff --git a/Logs/Trust/Pure.hs b/Logs/Trust/Pure.hs
new file mode 100644
index 000000000..11cfbe056
--- /dev/null
+++ b/Logs/Trust/Pure.hs
@@ -0,0 +1,36 @@
+{- git-annex trust log, pure operations
+ -
+ - Copyright 2010-2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.Trust.Pure where
+
+import Common.Annex
+import Types.TrustLevel
+import Logs.UUIDBased
+
+calcTrustMap :: String -> TrustMap
+calcTrustMap = simpleMap . parseLog (Just . parseTrustLog)
+
+{- The trust.log used to only list trusted repos, without a field for the
+ - trust status, which is why this defaults to Trusted. -}
+parseTrustLog :: String -> TrustLevel
+parseTrustLog s = maybe Trusted parse $ headMaybe $ words s
+ where
+ parse "1" = Trusted
+ parse "0" = UnTrusted
+ parse "X" = DeadTrusted
+ parse _ = SemiTrusted
+
+showTrustLog :: TrustLevel -> String
+showTrustLog Trusted = "1"
+showTrustLog UnTrusted = "0"
+showTrustLog DeadTrusted = "X"
+showTrustLog SemiTrusted = "?"
+
+prop_parse_show_TrustLog :: Bool
+prop_parse_show_TrustLog = all check [minBound .. maxBound]
+ where
+ check l = parseTrustLog (showTrustLog l) == l
diff --git a/debian/changelog b/debian/changelog
index cb6fbee5a..a28c9f187 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,8 @@ git-annex (4.20130828) UNRELEASED; urgency=low
* forget: New command, causes git-annex branch history to be forgotten
in a way that will spread to other clones of the repository.
(As long as they're running this version or newer of git-annex.)
+ * forget --drop-dead: Completely removes mentions of repositories that
+ have been marked as dead from the git-annex branch.
* sync, assistant: Force push of the git-annex branch. Necessary
to ensure it gets pushed to remotes after being rewritten by forget.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 5fb0ce5a4..269588add 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -482,17 +482,16 @@ subdirectories).
* forget
Causes the git-annex branch to be rewritten, throwing away historical
- data about past locations of files, files that are no longer present on
- any remote, etc. The resulting branch will use less space, but for
- example `git annex log` will not be able to show where files used to
- be located.
+ data about past locations of files. The resulting branch will use less
+ space, but `git annex log` will not be able to show where
+ files used to be located.
- To also prune references to remotes that have been marked as dead,
- specify --dead.
+ To also prune references to repositories that have been marked as dead,
+ specify --drop-dead.
When this rewritten branch is merged into other clones of
the repository, git-annex will automatically perform the same rewriting
- to their local git-annex branch. So the forgetfulness will automatically
+ to their local git-annex branches. So the forgetfulness will automatically
propigate out from its starting point until all repositories running
git-annex have forgotten their old history. (You may need to force
git to push the branch to any git repositories not running git-annex.