diff options
author | Joey Hess <joey@kitenet.net> | 2013-08-31 17:38:33 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2013-08-31 17:51:13 -0400 |
commit | 2afb4de6c02a4515f8b1bf6e24a32cbde7eae5a8 (patch) | |
tree | 22f667488fe21140622dd235fed81d03c1b747de /Annex | |
parent | 2b83639fac92307deeaa7b1bc75a0c71f35e5b1e (diff) |
forget --drop-dead: Completely removes mentions of repositories that have been marked as dead from the git-annex branch.
Wrote nice pure transition calculator, and ugly code to stage its results
into the git-annex branch. Also had to split up several Log modules
that Annex.Branch needed to use, but that themselves used Annex.Branch.
The transition calculator is limited to looking at and changing one file at
a time. While this made the implementation relatively easy, it precludes
transitions that do stuff like deleting old url log files for keys that are
being removed because they are no longer present anywhere.
Diffstat (limited to 'Annex')
-rw-r--r-- | Annex/Branch.hs | 91 | ||||
-rw-r--r-- | Annex/Branch/Transitions.hs | 53 |
2 files changed, 124 insertions, 20 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 334b60634..9ee281de9 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -46,8 +46,12 @@ import Annex.CatFile import Annex.Perms import qualified Annex import Utility.Env +import Logs import Logs.Transitions +import Logs.Trust.Pure import Annex.ReplaceFile +import qualified Annex.Queue +import Annex.Branch.Transitions {- Name of the branch that is used to store git-annex's information. -} name :: Git.Ref @@ -194,7 +198,10 @@ get' :: FilePath -> Annex String get' file = go =<< getJournalFile file where go (Just journalcontent) = return journalcontent - go Nothing = withIndex $ L.unpack <$> catFile fullname file + go Nothing = getRaw file + +getRaw :: FilePath -> Annex String +getRaw file = withIndex $ L.unpack <$> catFile fullname file {- Applies a function to modifiy the content of a file. - @@ -272,13 +279,17 @@ commitBranch' branchref message parents = do files :: Annex [FilePath] files = do update - withIndex $ do - bfiles <- inRepo $ Git.Command.pipeNullSplitZombie - [ Params "ls-tree --name-only -r -z" - , Param $ show fullname - ] - jfiles <- getJournalledFiles - return $ jfiles ++ bfiles + (++) + <$> branchFiles + <*> getJournalledFiles + +{- Files in the branch, not including any from journalled changes, + - and without updating the branch. -} +branchFiles :: Annex [FilePath] +branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie + [ Params "ls-tree --name-only -r -z" + , Param $ show fullname + ] {- Populates the branch's index file with the current branch contents. - @@ -436,20 +447,60 @@ getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content - commits it to the branch, or creates a new branch, and returns - the branch's ref. -} performTransitions :: Transitions -> Bool -> Annex Git.Ref -performTransitions ts neednewbranch = withIndex $ do - when (inTransitions ForgetDeadRemotes ts) $ - error "TODO ForgetDeadRemotes transition" - if neednewbranch - then do - committedref <- inRepo $ Git.Branch.commit message fullname [] - setIndexSha committedref - return committedref - else do - ref <- getBranch - commitBranch ref message [fullname] - getBranch +performTransitions ts neednewbranch = do + -- For simplicity & speed, we're going to use the Annex.Queue to + -- update the git-annex branch, while it usually holds changes + -- for the head branch. Flush any such changes. + Annex.Queue.flush + withIndex $ do + run $ mapMaybe getTransitionCalculator $ transitionList ts + Annex.Queue.flush + if neednewbranch + then do + committedref <- inRepo $ Git.Branch.commit message fullname [] + setIndexSha committedref + return committedref + else do + ref <- getBranch + commitBranch ref message [fullname] + getBranch where message | neednewbranch = "new branch for transition " ++ tdesc | otherwise = "continuing transition " ++ tdesc tdesc = show $ map describeTransition $ transitionList ts + + {- The changes to make to the branch are calculated and applied to + - the branch directly, rather than going through the journal, + - which would be innefficient. (And the journal is not designed + - to hold changes to every file in the branch at once.) + - + - When a file in the branch is changed by transition code, + - that value is remembered and fed into the code for subsequent + - transitions. + -} + run [] = noop + run changers = do + trustmap <- calcTrustMap <$> getRaw trustLog + fs <- branchFiles + hasher <- inRepo hashObjectStart + forM_ fs $ \f -> do + content <- getRaw f + apply changers hasher f content trustmap + liftIO $ hashObjectStop hasher + apply [] _ _ _ _ = return () + apply (changer:rest) hasher file content trustmap = + case changer file content trustmap of + RemoveFile -> do + Annex.Queue.addUpdateIndex + =<< inRepo (Git.UpdateIndex.unstageFile file) + -- File is deleted; can't run any other + -- transitions on it. + return () + ChangeFile content' -> do + sha <- inRepo $ hashObject BlobObject content' + Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $ + Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath file) + apply rest hasher file content' trustmap + PreserveFile -> + apply rest hasher file content trustmap diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs new file mode 100644 index 000000000..90002de62 --- /dev/null +++ b/Annex/Branch/Transitions.hs @@ -0,0 +1,53 @@ +{- git-annex branch transitions + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Branch.Transitions ( + FileTransition(..), + getTransitionCalculator +) where + +import Logs +import Logs.Transitions +import Logs.UUIDBased as UUIDBased +import Logs.Presence.Pure as Presence +import Types.TrustLevel +import Types.UUID + +import qualified Data.Map as M + +data FileTransition + = ChangeFile String + | RemoveFile + | PreserveFile + +type TransitionCalculator = FilePath -> String -> TrustMap -> FileTransition + +getTransitionCalculator :: Transition -> Maybe TransitionCalculator +getTransitionCalculator ForgetGitHistory = Nothing +getTransitionCalculator ForgetDeadRemotes = Just dropDead + +dropDead :: FilePath -> String -> TrustMap -> FileTransition +dropDead f content trustmap = case getLogVariety f of + Just UUIDBasedLog -> ChangeFile $ + UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content + Just (PresenceLog _) -> + let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content + in if null newlog + then RemoveFile + else ChangeFile $ Presence.showLog newlog + Nothing -> PreserveFile + +dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String +dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const + +{- Presence logs can contain UUIDs or other values. Any line that matches + - a dead uuid is dropped; any other values are passed through. -} +dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine] +dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info) + +notDead :: TrustMap -> (v -> UUID) -> v -> Bool +notDead trustmap a v = M.findWithDefault SemiTrusted (a v) trustmap /= DeadTrusted |