diff options
-rw-r--r-- | Annex/Branch.hs | 91 | ||||
-rw-r--r-- | Annex/Branch/Transitions.hs | 53 | ||||
-rw-r--r-- | Command/Forget.hs | 31 | ||||
-rw-r--r-- | Logs/Presence.hs | 83 | ||||
-rw-r--r-- | Logs/Presence/Pure.hs | 84 | ||||
-rw-r--r-- | Logs/Transitions.hs | 13 | ||||
-rw-r--r-- | Logs/Trust.hs | 28 | ||||
-rw-r--r-- | Logs/Trust/Pure.hs | 36 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 13 |
10 files changed, 279 insertions, 155 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 334b60634..9ee281de9 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -46,8 +46,12 @@ import Annex.CatFile import Annex.Perms import qualified Annex import Utility.Env +import Logs import Logs.Transitions +import Logs.Trust.Pure import Annex.ReplaceFile +import qualified Annex.Queue +import Annex.Branch.Transitions {- Name of the branch that is used to store git-annex's information. -} name :: Git.Ref @@ -194,7 +198,10 @@ get' :: FilePath -> Annex String get' file = go =<< getJournalFile file where go (Just journalcontent) = return journalcontent - go Nothing = withIndex $ L.unpack <$> catFile fullname file + go Nothing = getRaw file + +getRaw :: FilePath -> Annex String +getRaw file = withIndex $ L.unpack <$> catFile fullname file {- Applies a function to modifiy the content of a file. - @@ -272,13 +279,17 @@ commitBranch' branchref message parents = do files :: Annex [FilePath] files = do update - withIndex $ do - bfiles <- inRepo $ Git.Command.pipeNullSplitZombie - [ Params "ls-tree --name-only -r -z" - , Param $ show fullname - ] - jfiles <- getJournalledFiles - return $ jfiles ++ bfiles + (++) + <$> branchFiles + <*> getJournalledFiles + +{- Files in the branch, not including any from journalled changes, + - and without updating the branch. -} +branchFiles :: Annex [FilePath] +branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie + [ Params "ls-tree --name-only -r -z" + , Param $ show fullname + ] {- Populates the branch's index file with the current branch contents. - @@ -436,20 +447,60 @@ getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content - commits it to the branch, or creates a new branch, and returns - the branch's ref. -} performTransitions :: Transitions -> Bool -> Annex Git.Ref -performTransitions ts neednewbranch = withIndex $ do - when (inTransitions ForgetDeadRemotes ts) $ - error "TODO ForgetDeadRemotes transition" - if neednewbranch - then do - committedref <- inRepo $ Git.Branch.commit message fullname [] - setIndexSha committedref - return committedref - else do - ref <- getBranch - commitBranch ref message [fullname] - getBranch +performTransitions ts neednewbranch = do + -- For simplicity & speed, we're going to use the Annex.Queue to + -- update the git-annex branch, while it usually holds changes + -- for the head branch. Flush any such changes. + Annex.Queue.flush + withIndex $ do + run $ mapMaybe getTransitionCalculator $ transitionList ts + Annex.Queue.flush + if neednewbranch + then do + committedref <- inRepo $ Git.Branch.commit message fullname [] + setIndexSha committedref + return committedref + else do + ref <- getBranch + commitBranch ref message [fullname] + getBranch where message | neednewbranch = "new branch for transition " ++ tdesc | otherwise = "continuing transition " ++ tdesc tdesc = show $ map describeTransition $ transitionList ts + + {- The changes to make to the branch are calculated and applied to + - the branch directly, rather than going through the journal, + - which would be innefficient. (And the journal is not designed + - to hold changes to every file in the branch at once.) + - + - When a file in the branch is changed by transition code, + - that value is remembered and fed into the code for subsequent + - transitions. + -} + run [] = noop + run changers = do + trustmap <- calcTrustMap <$> getRaw trustLog + fs <- branchFiles + hasher <- inRepo hashObjectStart + forM_ fs $ \f -> do + content <- getRaw f + apply changers hasher f content trustmap + liftIO $ hashObjectStop hasher + apply [] _ _ _ _ = return () + apply (changer:rest) hasher file content trustmap = + case changer file content trustmap of + RemoveFile -> do + Annex.Queue.addUpdateIndex + =<< inRepo (Git.UpdateIndex.unstageFile file) + -- File is deleted; can't run any other + -- transitions on it. + return () + ChangeFile content' -> do + sha <- inRepo $ hashObject BlobObject content' + Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $ + Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath file) + apply rest hasher file content' trustmap + PreserveFile -> + apply rest hasher file content trustmap diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs new file mode 100644 index 000000000..90002de62 --- /dev/null +++ b/Annex/Branch/Transitions.hs @@ -0,0 +1,53 @@ +{- git-annex branch transitions + - + - Copyright 2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Branch.Transitions ( + FileTransition(..), + getTransitionCalculator +) where + +import Logs +import Logs.Transitions +import Logs.UUIDBased as UUIDBased +import Logs.Presence.Pure as Presence +import Types.TrustLevel +import Types.UUID + +import qualified Data.Map as M + +data FileTransition + = ChangeFile String + | RemoveFile + | PreserveFile + +type TransitionCalculator = FilePath -> String -> TrustMap -> FileTransition + +getTransitionCalculator :: Transition -> Maybe TransitionCalculator +getTransitionCalculator ForgetGitHistory = Nothing +getTransitionCalculator ForgetDeadRemotes = Just dropDead + +dropDead :: FilePath -> String -> TrustMap -> FileTransition +dropDead f content trustmap = case getLogVariety f of + Just UUIDBasedLog -> ChangeFile $ + UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content + Just (PresenceLog _) -> + let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content + in if null newlog + then RemoveFile + else ChangeFile $ Presence.showLog newlog + Nothing -> PreserveFile + +dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String +dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const + +{- Presence logs can contain UUIDs or other values. Any line that matches + - a dead uuid is dropped; any other values are passed through. -} +dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine] +dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info) + +notDead :: TrustMap -> (v -> UUID) -> v -> Bool +notDead trustmap a v = M.findWithDefault SemiTrusted (a v) trustmap /= DeadTrusted diff --git a/Command/Forget.hs b/Command/Forget.hs index e405a9918..d216ae3ca 100644 --- a/Command/Forget.hs +++ b/Command/Forget.hs @@ -12,30 +12,41 @@ import Command import qualified Annex.Branch as Branch import Logs.Transitions import qualified Annex +import qualified Option import Data.Time.Clock.POSIX def :: [Command] -def = [command "forget" paramNothing seek +def = [withOptions forgetOptions $ command "forget" paramNothing seek SectionMaintenance "prune git-annex branch history"] +forgetOptions :: [Option] +forgetOptions = [dropDeadOption] + +dropDeadOption :: Option +dropDeadOption = Option.flag [] "drop-dead" "drop references to dead repositories" + seek :: [CommandSeek] -seek = [withNothing start] +seek = [withFlag dropDeadOption $ \dropdead -> + withNothing $ start dropdead] -start :: CommandStart -start = do +start :: Bool -> CommandStart +start dropdead = do showStart "forget" "git-annex" - next $ perform =<< Annex.getState Annex.force - -perform :: Bool -> CommandPerform -perform True = do now <- liftIO getPOSIXTime - let ts = addTransition now ForgetGitHistory noTransitions + let basets = addTransition now ForgetGitHistory noTransitions + let ts = if dropdead + then addTransition now ForgetDeadRemotes basets + else basets + next $ perform ts =<< Annex.getState Annex.force + +perform :: Transitions -> Bool -> CommandPerform +perform ts True = do recordTransitions Branch.change ts -- get branch committed before contining with the transition Branch.update void $ Branch.performTransitions ts True next $ return True -perform False = do +perform _ False = do showLongNote "To forget git-annex branch history, you must specify --force. This deletes metadata!" stop diff --git a/Logs/Presence.hs b/Logs/Presence.hs index ec5cec209..516d59618 100644 --- a/Logs/Presence.hs +++ b/Logs/Presence.hs @@ -12,36 +12,18 @@ -} module Logs.Presence ( - LogStatus(..), - LogLine(LogLine), + module X, addLog, readLog, - getLog, - parseLog, - showLog, logNow, - compactLog, - currentLog, - prop_parse_show_log, + currentLog ) where import Data.Time.Clock.POSIX -import Data.Time -import System.Locale -import qualified Data.Map as M +import Logs.Presence.Pure as X import Common.Annex import qualified Annex.Branch -import Utility.QuickCheck - -data LogLine = LogLine { - date :: POSIXTime, - status :: LogStatus, - info :: String -} deriving (Eq, Show) - -data LogStatus = InfoPresent | InfoMissing - deriving (Eq, Show, Bounded, Enum) addLog :: FilePath -> LogLine -> Annex () addLog file line = Annex.Branch.change file $ \s -> @@ -52,29 +34,6 @@ addLog file line = Annex.Branch.change file $ \s -> readLog :: FilePath -> Annex [LogLine] readLog = parseLog <$$> Annex.Branch.get -{- Parses a log file. Unparseable lines are ignored. -} -parseLog :: String -> [LogLine] -parseLog = mapMaybe parseline . lines - where - parseline l = LogLine - <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d) - <*> parsestatus s - <*> pure rest - where - (d, pastd) = separate (== ' ') l - (s, rest) = separate (== ' ') pastd - parsestatus "1" = Just InfoPresent - parsestatus "0" = Just InfoMissing - parsestatus _ = Nothing - -{- Generates a log file. -} -showLog :: [LogLine] -> String -showLog = unlines . map genline - where - genline (LogLine d s i) = unwords [show d, genstatus s, i] - genstatus InfoPresent = "1" - genstatus InfoMissing = "0" - {- Generates a new LogLine with the current date. -} logNow :: LogStatus -> String -> Annex LogLine logNow s i = do @@ -84,39 +43,3 @@ logNow s i = do {- Reads a log and returns only the info that is still in effect. -} currentLog :: FilePath -> Annex [String] currentLog file = map info . filterPresent <$> readLog file - -{- Given a log, returns only the info that is are still in effect. -} -getLog :: String -> [String] -getLog = map info . filterPresent . parseLog - -{- Returns the info from LogLines that are in effect. -} -filterPresent :: [LogLine] -> [LogLine] -filterPresent = filter (\l -> InfoPresent == status l) . compactLog - -{- Compacts a set of logs, returning a subset that contains the current - - status. -} -compactLog :: [LogLine] -> [LogLine] -compactLog = M.elems . foldr mapLog M.empty - -type LogMap = M.Map String LogLine - -{- Inserts a log into a map of logs, if the log has better (ie, newer) - - information than the other logs in the map -} -mapLog :: LogLine -> LogMap -> LogMap -mapLog l m - | better = M.insert i l m - | otherwise = m - where - better = maybe True newer $ M.lookup i m - newer l' = date l' <= date l - i = info l - -instance Arbitrary LogLine where - arbitrary = LogLine - <$> arbitrary - <*> elements [minBound..maxBound] - <*> arbitrary `suchThat` ('\n' `notElem`) - -prop_parse_show_log :: [LogLine] -> Bool -prop_parse_show_log l = parseLog (showLog l) == l - diff --git a/Logs/Presence/Pure.hs b/Logs/Presence/Pure.hs new file mode 100644 index 000000000..ffeb78b26 --- /dev/null +++ b/Logs/Presence/Pure.hs @@ -0,0 +1,84 @@ +{- git-annex presence log, pure operations + - + - Copyright 2010-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Presence.Pure where + +import Data.Time.Clock.POSIX +import Data.Time +import System.Locale +import qualified Data.Map as M + +import Common.Annex +import Utility.QuickCheck + +data LogLine = LogLine { + date :: POSIXTime, + status :: LogStatus, + info :: String +} deriving (Eq, Show) + +data LogStatus = InfoPresent | InfoMissing + deriving (Eq, Show, Bounded, Enum) + +{- Parses a log file. Unparseable lines are ignored. -} +parseLog :: String -> [LogLine] +parseLog = mapMaybe parseline . lines + where + parseline l = LogLine + <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d) + <*> parsestatus s + <*> pure rest + where + (d, pastd) = separate (== ' ') l + (s, rest) = separate (== ' ') pastd + parsestatus "1" = Just InfoPresent + parsestatus "0" = Just InfoMissing + parsestatus _ = Nothing + +{- Generates a log file. -} +showLog :: [LogLine] -> String +showLog = unlines . map genline + where + genline (LogLine d s i) = unwords [show d, genstatus s, i] + genstatus InfoPresent = "1" + genstatus InfoMissing = "0" + +{- Given a log, returns only the info that is are still in effect. -} +getLog :: String -> [String] +getLog = map info . filterPresent . parseLog + +{- Returns the info from LogLines that are in effect. -} +filterPresent :: [LogLine] -> [LogLine] +filterPresent = filter (\l -> InfoPresent == status l) . compactLog + +{- Compacts a set of logs, returning a subset that contains the current + - status. -} +compactLog :: [LogLine] -> [LogLine] +compactLog = M.elems . foldr mapLog M.empty + +type LogMap = M.Map String LogLine + +{- Inserts a log into a map of logs, if the log has better (ie, newer) + - information than the other logs in the map -} +mapLog :: LogLine -> LogMap -> LogMap +mapLog l m + | better = M.insert i l m + | otherwise = m + where + better = maybe True newer $ M.lookup i m + newer l' = date l' <= date l + i = info l + +instance Arbitrary LogLine where + arbitrary = LogLine + <$> arbitrary + <*> elements [minBound..maxBound] + <*> arbitrary `suchThat` ('\n' `notElem`) + +prop_parse_show_log :: [LogLine] -> Bool +prop_parse_show_log l = parseLog (showLog l) == l + diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs index d4b7d5eb3..783ce5090 100644 --- a/Logs/Transitions.hs +++ b/Logs/Transitions.hs @@ -7,16 +7,6 @@ - done that is listed in the remote branch by checking that the local - branch contains the same transition, with the same or newer start time. - - - When a remote branch that has had an transition performed on it - - becomes available for merging into the local git-annex branch, - - the transition is first performed on the local branch. - - - - When merging a remote branch into the local git-annex branch, - - all transitions that have been performed on the local branch must also - - have been performed on the remote branch already. (Or it would be - - possible to perform the transitions on a fixup branch and merge it, - - but that would be expensive.) - - - Copyright 2013 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. @@ -86,9 +76,6 @@ parseTransitionLine s = TransitionLine <$> pdate ds <*> readish ts combineTransitions :: [Transitions] -> Transitions combineTransitions = S.unions -inTransitions :: Transition -> Transitions -> Bool -inTransitions t = not . S.null . S.filter (\l -> transition l == t) - transitionList :: Transitions -> [Transition] transitionList = map transition . S.elems diff --git a/Logs/Trust.hs b/Logs/Trust.hs index 6c6b33f70..c6f0ad3ab 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -6,6 +6,7 @@ -} module Logs.Trust ( + module X, trustLog, TrustLevel(..), trustGet, @@ -16,8 +17,6 @@ module Logs.Trust ( lookupTrust, trustMapLoad, trustMapRaw, - - prop_parse_show_TrustLog, ) where import qualified Data.Map as M @@ -31,6 +30,7 @@ import Logs import Logs.UUIDBased import Remote.List import qualified Types.Remote +import Logs.Trust.Pure as X {- Returns a list of UUIDs that the trustLog indicates have the - specified trust level. @@ -94,26 +94,4 @@ trustMapLoad = do {- Does not include forcetrust or git config values, just those from the - log file. -} trustMapRaw :: Annex TrustMap -trustMapRaw = simpleMap . parseLog (Just . parseTrustLog) - <$> Annex.Branch.get trustLog - -{- The trust.log used to only list trusted repos, without a field for the - - trust status, which is why this defaults to Trusted. -} -parseTrustLog :: String -> TrustLevel -parseTrustLog s = maybe Trusted parse $ headMaybe $ words s - where - parse "1" = Trusted - parse "0" = UnTrusted - parse "X" = DeadTrusted - parse _ = SemiTrusted - -showTrustLog :: TrustLevel -> String -showTrustLog Trusted = "1" -showTrustLog UnTrusted = "0" -showTrustLog DeadTrusted = "X" -showTrustLog SemiTrusted = "?" - -prop_parse_show_TrustLog :: Bool -prop_parse_show_TrustLog = all check [minBound .. maxBound] - where - check l = parseTrustLog (showTrustLog l) == l +trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog diff --git a/Logs/Trust/Pure.hs b/Logs/Trust/Pure.hs new file mode 100644 index 000000000..11cfbe056 --- /dev/null +++ b/Logs/Trust/Pure.hs @@ -0,0 +1,36 @@ +{- git-annex trust log, pure operations + - + - Copyright 2010-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Trust.Pure where + +import Common.Annex +import Types.TrustLevel +import Logs.UUIDBased + +calcTrustMap :: String -> TrustMap +calcTrustMap = simpleMap . parseLog (Just . parseTrustLog) + +{- The trust.log used to only list trusted repos, without a field for the + - trust status, which is why this defaults to Trusted. -} +parseTrustLog :: String -> TrustLevel +parseTrustLog s = maybe Trusted parse $ headMaybe $ words s + where + parse "1" = Trusted + parse "0" = UnTrusted + parse "X" = DeadTrusted + parse _ = SemiTrusted + +showTrustLog :: TrustLevel -> String +showTrustLog Trusted = "1" +showTrustLog UnTrusted = "0" +showTrustLog DeadTrusted = "X" +showTrustLog SemiTrusted = "?" + +prop_parse_show_TrustLog :: Bool +prop_parse_show_TrustLog = all check [minBound .. maxBound] + where + check l = parseTrustLog (showTrustLog l) == l diff --git a/debian/changelog b/debian/changelog index cb6fbee5a..a28c9f187 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,8 @@ git-annex (4.20130828) UNRELEASED; urgency=low * forget: New command, causes git-annex branch history to be forgotten in a way that will spread to other clones of the repository. (As long as they're running this version or newer of git-annex.) + * forget --drop-dead: Completely removes mentions of repositories that + have been marked as dead from the git-annex branch. * sync, assistant: Force push of the git-annex branch. Necessary to ensure it gets pushed to remotes after being rewritten by forget. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5fb0ce5a4..269588add 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -482,17 +482,16 @@ subdirectories). * forget Causes the git-annex branch to be rewritten, throwing away historical - data about past locations of files, files that are no longer present on - any remote, etc. The resulting branch will use less space, but for - example `git annex log` will not be able to show where files used to - be located. + data about past locations of files. The resulting branch will use less + space, but `git annex log` will not be able to show where + files used to be located. - To also prune references to remotes that have been marked as dead, - specify --dead. + To also prune references to repositories that have been marked as dead, + specify --drop-dead. When this rewritten branch is merged into other clones of the repository, git-annex will automatically perform the same rewriting - to their local git-annex branch. So the forgetfulness will automatically + to their local git-annex branches. So the forgetfulness will automatically propigate out from its starting point until all repositories running git-annex have forgotten their old history. (You may need to force git to push the branch to any git repositories not running git-annex. |