From 0ffe5408ae1b396453f080bef2858542317daf23 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 28 Aug 2013 15:57:42 -0400 Subject: untested transition detection on merging, and transition running code --- Annex/Branch.hs | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 102 insertions(+), 10 deletions(-) (limited to 'Annex') diff --git a/Annex/Branch.hs b/Annex/Branch.hs index bc3736a9a..fa4b0265d 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -1,6 +1,6 @@ {- management of the git-annex branch - - - Copyright 2011-2012 Joey Hess + - Copyright 2011-2013 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -22,9 +22,12 @@ module Annex.Branch ( commit, files, withIndex, + performTransitions, ) where import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Set as S +import qualified Data.Map as M import Common.Annex import Annex.BranchState @@ -32,6 +35,7 @@ import Annex.Journal import qualified Git import qualified Git.Command import qualified Git.Ref +import qualified Git.Sha import qualified Git.Branch import qualified Git.UnionMerge import qualified Git.UpdateIndex @@ -42,6 +46,8 @@ import Annex.CatFile import Annex.Perms import qualified Annex import Utility.Env +import Logs.Transitions +import Annex.ReplaceFile {- Name of the branch that is used to store git-annex's information. -} name :: Git.Ref @@ -110,6 +116,9 @@ forceUpdate = updateTo =<< siblingBranches - later get staged, and might overwrite changes made during the merge. - This is only done if some of the Refs do need to be merged. - + - Also handles performing any Transitions that have not yet been + - performed, in either the local branch, or the Refs. + - - Returns True if any refs were merged in, False otherwise. -} updateTo :: [(Git.Ref, Git.Branch)] -> Annex Bool @@ -117,7 +126,8 @@ updateTo pairs = do -- ensure branch exists, and get its current ref branchref <- getBranch dirty <- journalDirty - (refs, branches) <- unzip <$> filterM isnewer pairs + ignoredrefs <- getIgnoredRefs + (refs, branches) <- unzip <$> filterM (isnewer ignoredrefs) pairs if null refs {- Even when no refs need to be merged, the index - may still be updated if the branch has gotten ahead @@ -132,7 +142,9 @@ updateTo pairs = do else lockJournal $ go branchref dirty refs branches return $ not $ null refs where - isnewer (r, _) = inRepo $ Git.Branch.changed fullname r + isnewer ignoredrefs (r, _) + | S.member r ignoredrefs = return False + | otherwise = inRepo $ Git.Branch.changed fullname r go branchref dirty refs branches = withIndex $ do cleanjournal <- if dirty then stageJournal else return noop let merge_desc = if null branches @@ -140,16 +152,23 @@ updateTo pairs = do else "merging " ++ unwords (map Git.Ref.describe branches) ++ " into " ++ show name + localtransitions <- parseTransitionsStrictly "local" + <$> getStale transitionsLog unless (null branches) $ do showSideAction merge_desc mergeIndex refs - ff <- if dirty - then return False - else inRepo $ Git.Branch.fastForward fullname refs - if ff - then updateIndex branchref - else commitBranch branchref merge_desc - (nub $ fullname:refs) + let commitrefs = nub $ fullname:refs + transitioned <- handleTransitions localtransitions commitrefs + case transitioned of + Nothing -> do + ff <- if dirty + then return False + else inRepo $ Git.Branch.fastForward fullname refs + if ff + then updateIndex branchref + else commitBranch branchref merge_desc commitrefs + Just (branchref', commitrefs') -> + commitBranch branchref' merge_desc commitrefs' liftIO cleanjournal {- Gets the content of a file, which may be in the journal, or in the index @@ -361,3 +380,76 @@ stageJournal = withIndex $ do sha <- hashFile h path streamer $ Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath $ fileJournal file) + +{- This is run after the refs have been merged into the index, + - but before the result is committed to the branch. + - Which is why it's passed the contents of the local branches's + - transition log before that merge took place. + - + - When the refs contain transitions that have not yet been done locally, + - the transitions are performed on the index, and a new branch + - is created from the result, and returned. + - + - When there are transitions recorded locally that have not been done + - to the remote refs, the transitions are performed in the index, + - and the existing branch is returned. In this case, the untransitioned + - remote refs cannot be merged into the branch (since transitions + - throw away history), so none of them are included in the returned + - list of refs, and they are added to the list of refs to ignore, + - to avoid re-merging content from them again. + -} +handleTransitions :: Transitions -> [Git.Ref] -> Annex (Maybe (Git.Branch, [Git.Ref])) +handleTransitions localts refs = do + m <- M.fromList <$> mapM getreftransition refs + let remotets = M.elems m + if all (localts ==) remotets + then return Nothing + else do + let allts = combineTransitions (localts:remotets) + let (transitionedrefs, untransitionedrefs) = + partition (\r -> M.lookup r m == Just allts) refs + transitionedbranch <- performTransitions allts (localts /= allts) + ignoreRefs untransitionedrefs + return $ Just (transitionedbranch, transitionedrefs) + where + getreftransition ref = do + ts <- parseTransitionsStrictly "remote" . L.unpack + <$> catFile ref transitionsLog + return (ref, ts) + +ignoreRefs :: [Git.Ref] -> Annex () +ignoreRefs rs = do + old <- getIgnoredRefs + let s = S.unions [old, S.fromList rs] + f <- fromRepo gitAnnexIgnoredRefs + replaceFile f $ \tmp -> liftIO $ writeFile tmp $ + unlines $ map show $ S.elems s + +getIgnoredRefs :: Annex (S.Set Git.Ref) +getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content + where + content = do + f <- fromRepo gitAnnexIgnoredRefs + liftIO $ catchDefaultIO "" $ readFile f + +{- Performs the specified transitions on the contents of the index file, + - commits it to the branch, or creates a new branch, and returns + - the branch's ref. -} +performTransitions :: Transitions -> Bool -> Annex Git.Ref +performTransitions ts neednewbranch = withIndex $ do + when (inTransitions ForgetDeadRemotes ts) $ + error "TODO ForgetDeadRemotes transition" + if neednewbranch + then do + committedref <- inRepo $ Git.Branch.commit message fullname [] + setIndexSha committedref + return committedref + else do + ref <- getBranch + commitBranch ref message [fullname] + getBranch + where + message + | neednewbranch = "new branch for transition " ++ tdesc + | otherwise = "continuing transition " ++ tdesc + tdesc = show $ map describeTransition $ transitionList ts -- cgit v1.2.3 From 69bfbd94004d193050e0fe0827f380bdd1ed5644 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 28 Aug 2013 16:38:03 -0400 Subject: add forget command Works, more or less. --dead is not implemented, and so far a new branch is made, but keys no longer present anywhere are not scrubbed. git annex sync fails to push the synced/git-annex branch after a forget, because it's not a fast-forward of the existing synced branch. Could be fixed by making git-annex sync use assistant-style sync branches. --- Annex/Branch.hs | 1 + Command/Forget.hs | 41 +++++++++++++++++++++++++++++++++++++++++ GitAnnex.hs | 2 ++ Logs/Transitions.hs | 10 ++++++---- doc/git-annex.mdwn | 2 +- 5 files changed, 51 insertions(+), 5 deletions(-) create mode 100644 Command/Forget.hs (limited to 'Annex') diff --git a/Annex/Branch.hs b/Annex/Branch.hs index fa4b0265d..5af6b6be9 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -402,6 +402,7 @@ handleTransitions :: Transitions -> [Git.Ref] -> Annex (Maybe (Git.Branch, [Git. handleTransitions localts refs = do m <- M.fromList <$> mapM getreftransition refs let remotets = M.elems m + liftIO $ print ("transitions", localts, remotets) if all (localts ==) remotets then return Nothing else do diff --git a/Command/Forget.hs b/Command/Forget.hs new file mode 100644 index 000000000..e405a9918 --- /dev/null +++ b/Command/Forget.hs @@ -0,0 +1,41 @@ +{- git-annex command + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Forget where + +import Common.Annex +import Command +import qualified Annex.Branch as Branch +import Logs.Transitions +import qualified Annex + +import Data.Time.Clock.POSIX + +def :: [Command] +def = [command "forget" paramNothing seek + SectionMaintenance "prune git-annex branch history"] + +seek :: [CommandSeek] +seek = [withNothing start] + +start :: CommandStart +start = do + showStart "forget" "git-annex" + next $ perform =<< Annex.getState Annex.force + +perform :: Bool -> CommandPerform +perform True = do + now <- liftIO getPOSIXTime + let ts = addTransition now ForgetGitHistory noTransitions + recordTransitions Branch.change ts + -- get branch committed before contining with the transition + Branch.update + void $ Branch.performTransitions ts True + next $ return True +perform False = do + showLongNote "To forget git-annex branch history, you must specify --force. This deletes metadata!" + stop diff --git a/GitAnnex.hs b/GitAnnex.hs index 05565e643..1212edf9f 100644 --- a/GitAnnex.hs +++ b/GitAnnex.hs @@ -67,6 +67,7 @@ import qualified Command.Map import qualified Command.Direct import qualified Command.Indirect import qualified Command.Upgrade +import qualified Command.Forget import qualified Command.Version import qualified Command.Help #ifdef WITH_ASSISTANT @@ -139,6 +140,7 @@ cmds = concat , Command.Direct.def , Command.Indirect.def , Command.Upgrade.def + , Command.Forget.def , Command.Version.def , Command.Help.def #ifdef WITH_ASSISTANT diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs index 41f4b2635..d4b7d5eb3 100644 --- a/Logs/Transitions.hs +++ b/Logs/Transitions.hs @@ -50,6 +50,9 @@ describeTransition :: Transition -> String describeTransition ForgetGitHistory = "forget git history" describeTransition ForgetDeadRemotes = "forget dead remotes" +noTransitions :: Transitions +noTransitions = S.empty + addTransition :: POSIXTime -> Transition -> Transitions -> Transitions addTransition ts t = S.insert $ TransitionLine ts t @@ -91,8 +94,7 @@ transitionList = map transition . S.elems {- Typically ran with Annex.Branch.change, but we can't import Annex.Branch - here since it depends on this module. -} -recordTransition :: (FilePath -> (String -> String) -> Annex ()) -> Transition -> Annex () -recordTransition changer o = do - t <- liftIO getPOSIXTime +recordTransitions :: (FilePath -> (String -> String) -> Annex ()) -> Transitions -> Annex () +recordTransitions changer t = do changer transitionsLog $ - showTransitions . addTransition t o . parseTransitionsStrictly "local" + showTransitions . S.union t . parseTransitionsStrictly "local" diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 72e376d64..5fb0ce5a4 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -488,7 +488,7 @@ subdirectories). be located. To also prune references to remotes that have been marked as dead, - specify --forget-dead. + specify --dead. When this rewritten branch is merged into other clones of the repository, git-annex will automatically perform the same rewriting -- cgit v1.2.3 From 74c1d2ec3e6693adf1b2233f984059da02400feb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Aug 2013 13:31:29 -0400 Subject: use --force in taggedPush This should make the assistant force update its tagged push branch after a transition like git annex forget. --- Annex/TaggedPush.hs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Annex') diff --git a/Annex/TaggedPush.hs b/Annex/TaggedPush.hs index 44a1a0eb0..2a5f823fd 100644 --- a/Annex/TaggedPush.hs +++ b/Annex/TaggedPush.hs @@ -49,6 +49,10 @@ fromTaggedBranch b = case split "/" $ show b of taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool taggedPush u info branch remote = Git.Command.runBool [ Param "push" + -- This is safe because we "own" the tagged branch we're pushing; + -- it has no other writers. Ensures it is pushed even if it has + -- been rewritten by a transition. + , Param "--force" , Param $ Remote.name remote , Param $ refspec Annex.Branch.name , Param $ refspec branch -- cgit v1.2.3 From 6a67b257f0083bb944cd132ac4a515fd4fb99a47 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Aug 2013 14:15:32 -0400 Subject: sync, assistant: Force push of the git-annex branch. Necessary to ensure it gets pushed to remotes after being rewritten by forget. See inline rationalles for why I think this is safe! --- Annex/TaggedPush.hs | 10 +++++----- Command/Sync.hs | 42 +++++++++++++++++++++++++++++------------- Git/Branch.hs | 4 ++++ debian/changelog | 10 ++++++++++ 4 files changed, 48 insertions(+), 18 deletions(-) (limited to 'Annex') diff --git a/Annex/TaggedPush.hs b/Annex/TaggedPush.hs index 2a5f823fd..039dc0e17 100644 --- a/Annex/TaggedPush.hs +++ b/Annex/TaggedPush.hs @@ -13,6 +13,7 @@ import qualified Annex.Branch import qualified Git import qualified Git.Ref import qualified Git.Command +import qualified Git.Branch import Utility.Base64 {- Converts a git branch into a branch that is tagged with a UUID, typically @@ -49,12 +50,11 @@ fromTaggedBranch b = case split "/" $ show b of taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool taggedPush u info branch remote = Git.Command.runBool [ Param "push" - -- This is safe because we "own" the tagged branch we're pushing; - -- it has no other writers. Ensures it is pushed even if it has - -- been rewritten by a transition. - , Param "--force" , Param $ Remote.name remote - , Param $ refspec Annex.Branch.name + {- Using forcePush here is safe because we "own" the tagged branch + - we're pushing; it has no other writers. Ensures it is pushed + - even if it has been rewritten by a transition. -} + , Param $ Git.Branch.forcePush $ refspec Annex.Branch.name , Param $ refspec branch ] where diff --git a/Command/Sync.hs b/Command/Sync.hs index 551c2fa69..567e3146b 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -167,29 +167,45 @@ pushRemote remote branch = go =<< needpush showOutput inRepo $ pushBranch remote branch -{- If the remote is a bare git repository, it's best to push the branch - - directly to it. On the other hand, if it's not bare, pushing to the - - checked out branch will fail, and this is why we use the syncBranch. +{- Pushes a regular branch like master to a remote. Also pushes the git-annex + - branch. + - + - If the remote is a bare git repository, it's best to push the regular + - branch directly to it, so that cloning/pulling will get it. + - On the other hand, if it's not bare, pushing to the checked out branch + - will fail, and this is why we push to its syncBranch. - - Git offers no way to tell if a remote is bare or not, so both methods - are tried. - - The direct push is likely to spew an ugly error message, so stderr is - - elided. Since progress is output to stderr too, the sync push is done - - first, and actually sends the data. Then the direct push is tried, - - with stderr discarded, to update the branch ref on the remote. + - elided. Since git progress display goes to stderr too, the sync push + - is done first, and actually sends the data. Then the direct push is + - tried, with stderr discarded, to update the branch ref on the remote. + - + - The sync push forces the update of the remote synced/git-annex branch. + - This is necessary if a transition has rewritten the git-annex branch. + - Normally any changes to the git-annex branch get pulled and merged before + - this push, so this forcing is unlikely to overwrite new data pushed + - in from another repository that is also syncing. + - + - But overwriting of data on synced/git-annex can happen, in a race. + - The only difference caused by using a forced push in that case is that + - the last repository to push wins the race, rather than the first to push. -} pushBranch :: Remote -> Git.Ref -> Git.Repo -> IO Bool -pushBranch remote branch g = tryIO directpush `after` syncpush +pushBranch remote branch g = tryIO (directpush g) `after` syncpush g where - syncpush = Git.Command.runBool (pushparams (refspec branch)) g - directpush = Git.Command.runQuiet (pushparams (show $ Git.Ref.base branch)) g - pushparams b = + syncpush = Git.Command.runBool $ pushparams + [ Git.Branch.forcePush $ refspec Annex.Branch.name + , refspec branch + ] + directpush = Git.Command.runQuiet $ pushparams + [show $ Git.Ref.base branch] + pushparams branches = [ Param "push" , Param $ Remote.name remote - , Param $ refspec Annex.Branch.name - , Param b - ] + ] ++ map Param branches refspec b = concat [ show $ Git.Ref.base b , ":" diff --git a/Git/Branch.hs b/Git/Branch.hs index d4a684016..fed53d767 100644 --- a/Git/Branch.hs +++ b/Git/Branch.hs @@ -101,3 +101,7 @@ commit message branch parentrefs repo = do return sha where ps = concatMap (\r -> ["-p", show r]) parentrefs + +{- A leading + makes git-push force pushing a branch. -} +forcePush :: String -> String +forcePush b = "+" ++ b diff --git a/debian/changelog b/debian/changelog index 68ba98b8b..cb6fbee5a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,13 @@ +git-annex (4.20130828) UNRELEASED; urgency=low + + * forget: New command, causes git-annex branch history to be forgotten + in a way that will spread to other clones of the repository. + (As long as they're running this version or newer of git-annex.) + * sync, assistant: Force push of the git-annex branch. Necessary + to ensure it gets pushed to remotes after being rewritten by forget. + + -- Joey Hess Tue, 27 Aug 2013 11:03:00 -0400 + git-annex (4.20130827) unstable; urgency=low * Youtube support! (And 53 other video hosts). When quvi is installed, -- cgit v1.2.3 From d3c895b8b328685541cf41e025cff17f94b258b1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Aug 2013 16:41:59 -0400 Subject: wording --- Annex/Branch.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Annex') diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 5af6b6be9..13e1f11eb 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -175,7 +175,7 @@ updateTo pairs = do - (and committed to the branch). - - Updates the branch if necessary, to ensure the most up-to-date available - - content is available. + - content is returned. - - Returns an empty string if the file doesn't exist yet. -} get :: FilePath -> Annex String -- cgit v1.2.3 From 2b83639fac92307deeaa7b1bc75a0c71f35e5b1e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Aug 2013 20:28:45 -0400 Subject: remove print --- Annex/Branch.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'Annex') diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 13e1f11eb..334b60634 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -402,7 +402,6 @@ handleTransitions :: Transitions -> [Git.Ref] -> Annex (Maybe (Git.Branch, [Git. handleTransitions localts refs = do m <- M.fromList <$> mapM getreftransition refs let remotets = M.elems m - liftIO $ print ("transitions", localts, remotets) if all (localts ==) remotets then return Nothing else do -- cgit v1.2.3 From 2afb4de6c02a4515f8b1bf6e24a32cbde7eae5a8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 31 Aug 2013 17:38:33 -0400 Subject: forget --drop-dead: Completely removes mentions of repositories that have been marked as dead from the git-annex branch. Wrote nice pure transition calculator, and ugly code to stage its results into the git-annex branch. Also had to split up several Log modules that Annex.Branch needed to use, but that themselves used Annex.Branch. The transition calculator is limited to looking at and changing one file at a time. While this made the implementation relatively easy, it precludes transitions that do stuff like deleting old url log files for keys that are being removed because they are no longer present anywhere. --- Annex/Branch.hs | 91 +++++++++++++++++++++++++++++++++++---------- Annex/Branch/Transitions.hs | 53 ++++++++++++++++++++++++++ Command/Forget.hs | 31 ++++++++++----- Logs/Presence.hs | 83 ++--------------------------------------- Logs/Presence/Pure.hs | 84 +++++++++++++++++++++++++++++++++++++++++ Logs/Transitions.hs | 13 ------- Logs/Trust.hs | 28 ++------------ Logs/Trust/Pure.hs | 36 ++++++++++++++++++ debian/changelog | 2 + doc/git-annex.mdwn | 13 +++---- 10 files changed, 279 insertions(+), 155 deletions(-) create mode 100644 Annex/Branch/Transitions.hs create mode 100644 Logs/Presence/Pure.hs create mode 100644 Logs/Trust/Pure.hs (limited to 'Annex') diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 334b60634..9ee281de9 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -46,8 +46,12 @@ import Annex.CatFile import Annex.Perms import qualified Annex import Utility.Env +import Logs import Logs.Transitions +import Logs.Trust.Pure import Annex.ReplaceFile +import qualified Annex.Queue +import Annex.Branch.Transitions {- Name of the branch that is used to store git-annex's information. -} name :: Git.Ref @@ -194,7 +198,10 @@ get' :: FilePath -> Annex String get' file = go =<< getJournalFile file where go (Just journalcontent) = return journalcontent - go Nothing = withIndex $ L.unpack <$> catFile fullname file + go Nothing = getRaw file + +getRaw :: FilePath -> Annex String +getRaw file = withIndex $ L.unpack <$> catFile fullname file {- Applies a function to modifiy the content of a file. - @@ -272,13 +279,17 @@ commitBranch' branchref message parents = do files :: Annex [FilePath] files = do update - withIndex $ do - bfiles <- inRepo $ Git.Command.pipeNullSplitZombie - [ Params "ls-tree --name-only -r -z" - , Param $ show fullname - ] - jfiles <- getJournalledFiles - return $ jfiles ++ bfiles + (++) + <$> branchFiles + <*> getJournalledFiles + +{- Files in the branch, not including any from journalled changes, + - and without updating the branch. -} +branchFiles :: Annex [FilePath] +branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie + [ Params "ls-tree --name-only -r -z" + , Param $ show fullname + ] {- Populates the branch's index file with the current branch contents. - @@ -436,20 +447,60 @@ getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content - commits it to the branch, or creates a new branch, and returns - the branch's ref. -} performTransitions :: Transitions -> Bool -> Annex Git.Ref -performTransitions ts neednewbranch = withIndex $ do - when (inTransitions ForgetDeadRemotes ts) $ - error "TODO ForgetDeadRemotes transition" - if neednewbranch - then do - committedref <- inRepo $ Git.Branch.commit message fullname [] - setIndexSha committedref - return committedref - else do - ref <- getBranch - commitBranch ref message [fullname] - getBranch +performTransitions ts neednewbranch = do + -- For simplicity & speed, we're going to use the Annex.Queue to + -- update the git-annex branch, while it usually holds changes + -- for the head branch. Flush any such changes. + Annex.Queue.flush + withIndex $ do + run $ mapMaybe getTransitionCalculator $ transitionList ts + Annex.Queue.flush + if neednewbranch + then do + committedref <- inRepo $ Git.Branch.commit message fullname [] + setIndexSha committedref + return committedref + else do + ref <- getBranch + commitBranch ref message [fullname] + getBranch where message | neednewbranch = "new branch for transition " ++ tdesc | otherwise = "continuing transition " ++ tdesc tdesc = show $ map describeTransition $ transitionList ts + + {- The changes to make to the branch are calculated and applied to + - the branch directly, rather than going through the journal, + - which would be innefficient. (And the journal is not designed + - to hold changes to every file in the branch at once.) + - + - When a file in the branch is changed by transition code, + - that value is remembered and fed into the code for subsequent + - transitions. + -} + run [] = noop + run changers = do + trustmap <- calcTrustMap <$> getRaw trustLog + fs <- branchFiles + hasher <- inRepo hashObjectStart + forM_ fs $ \f -> do + content <- getRaw f + apply changers hasher f content trustmap + liftIO $ hashObjectStop hasher + apply [] _ _ _ _ = return () + apply (changer:rest) hasher file content trustmap = + case changer file content trustmap of + RemoveFile -> do + Annex.Queue.addUpdateIndex + =<< inRepo (Git.UpdateIndex.unstageFile file) + -- File is deleted; can't run any other + -- transitions on it. + return () + ChangeFile content' -> do + sha <- inRepo $ hashObject BlobObject content' + Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $ + Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath file) + apply rest hasher file content' trustmap + PreserveFile -> + apply rest hasher file content trustmap diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs new file mode 100644 index 000000000..90002de62 --- /dev/null +++ b/Annex/Branch/Transitions.hs @@ -0,0 +1,53 @@ +{- git-annex branch transitions + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Branch.Transitions ( + FileTransition(..), + getTransitionCalculator +) where + +import Logs +import Logs.Transitions +import Logs.UUIDBased as UUIDBased +import Logs.Presence.Pure as Presence +import Types.TrustLevel +import Types.UUID + +import qualified Data.Map as M + +data FileTransition + = ChangeFile String + | RemoveFile + | PreserveFile + +type TransitionCalculator = FilePath -> String -> TrustMap -> FileTransition + +getTransitionCalculator :: Transition -> Maybe TransitionCalculator +getTransitionCalculator ForgetGitHistory = Nothing +getTransitionCalculator ForgetDeadRemotes = Just dropDead + +dropDead :: FilePath -> String -> TrustMap -> FileTransition +dropDead f content trustmap = case getLogVariety f of + Just UUIDBasedLog -> ChangeFile $ + UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content + Just (PresenceLog _) -> + let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content + in if null newlog + then RemoveFile + else ChangeFile $ Presence.showLog newlog + Nothing -> PreserveFile + +dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String +dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const + +{- Presence logs can contain UUIDs or other values. Any line that matches + - a dead uuid is dropped; any other values are passed through. -} +dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine] +dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info) + +notDead :: TrustMap -> (v -> UUID) -> v -> Bool +notDead trustmap a v = M.findWithDefault SemiTrusted (a v) trustmap /= DeadTrusted diff --git a/Command/Forget.hs b/Command/Forget.hs index e405a9918..d216ae3ca 100644 --- a/Command/Forget.hs +++ b/Command/Forget.hs @@ -12,30 +12,41 @@ import Command import qualified Annex.Branch as Branch import Logs.Transitions import qualified Annex +import qualified Option import Data.Time.Clock.POSIX def :: [Command] -def = [command "forget" paramNothing seek +def = [withOptions forgetOptions $ command "forget" paramNothing seek SectionMaintenance "prune git-annex branch history"] +forgetOptions :: [Option] +forgetOptions = [dropDeadOption] + +dropDeadOption :: Option +dropDeadOption = Option.flag [] "drop-dead" "drop references to dead repositories" + seek :: [CommandSeek] -seek = [withNothing start] +seek = [withFlag dropDeadOption $ \dropdead -> + withNothing $ start dropdead] -start :: CommandStart -start = do +start :: Bool -> CommandStart +start dropdead = do showStart "forget" "git-annex" - next $ perform =<< Annex.getState Annex.force - -perform :: Bool -> CommandPerform -perform True = do now <- liftIO getPOSIXTime - let ts = addTransition now ForgetGitHistory noTransitions + let basets = addTransition now ForgetGitHistory noTransitions + let ts = if dropdead + then addTransition now ForgetDeadRemotes basets + else basets + next $ perform ts =<< Annex.getState Annex.force + +perform :: Transitions -> Bool -> CommandPerform +perform ts True = do recordTransitions Branch.change ts -- get branch committed before contining with the transition Branch.update void $ Branch.performTransitions ts True next $ return True -perform False = do +perform _ False = do showLongNote "To forget git-annex branch history, you must specify --force. This deletes metadata!" stop diff --git a/Logs/Presence.hs b/Logs/Presence.hs index ec5cec209..516d59618 100644 --- a/Logs/Presence.hs +++ b/Logs/Presence.hs @@ -12,36 +12,18 @@ -} module Logs.Presence ( - LogStatus(..), - LogLine(LogLine), + module X, addLog, readLog, - getLog, - parseLog, - showLog, logNow, - compactLog, - currentLog, - prop_parse_show_log, + currentLog ) where import Data.Time.Clock.POSIX -import Data.Time -import System.Locale -import qualified Data.Map as M +import Logs.Presence.Pure as X import Common.Annex import qualified Annex.Branch -import Utility.QuickCheck - -data LogLine = LogLine { - date :: POSIXTime, - status :: LogStatus, - info :: String -} deriving (Eq, Show) - -data LogStatus = InfoPresent | InfoMissing - deriving (Eq, Show, Bounded, Enum) addLog :: FilePath -> LogLine -> Annex () addLog file line = Annex.Branch.change file $ \s -> @@ -52,29 +34,6 @@ addLog file line = Annex.Branch.change file $ \s -> readLog :: FilePath -> Annex [LogLine] readLog = parseLog <$$> Annex.Branch.get -{- Parses a log file. Unparseable lines are ignored. -} -parseLog :: String -> [LogLine] -parseLog = mapMaybe parseline . lines - where - parseline l = LogLine - <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d) - <*> parsestatus s - <*> pure rest - where - (d, pastd) = separate (== ' ') l - (s, rest) = separate (== ' ') pastd - parsestatus "1" = Just InfoPresent - parsestatus "0" = Just InfoMissing - parsestatus _ = Nothing - -{- Generates a log file. -} -showLog :: [LogLine] -> String -showLog = unlines . map genline - where - genline (LogLine d s i) = unwords [show d, genstatus s, i] - genstatus InfoPresent = "1" - genstatus InfoMissing = "0" - {- Generates a new LogLine with the current date. -} logNow :: LogStatus -> String -> Annex LogLine logNow s i = do @@ -84,39 +43,3 @@ logNow s i = do {- Reads a log and returns only the info that is still in effect. -} currentLog :: FilePath -> Annex [String] currentLog file = map info . filterPresent <$> readLog file - -{- Given a log, returns only the info that is are still in effect. -} -getLog :: String -> [String] -getLog = map info . filterPresent . parseLog - -{- Returns the info from LogLines that are in effect. -} -filterPresent :: [LogLine] -> [LogLine] -filterPresent = filter (\l -> InfoPresent == status l) . compactLog - -{- Compacts a set of logs, returning a subset that contains the current - - status. -} -compactLog :: [LogLine] -> [LogLine] -compactLog = M.elems . foldr mapLog M.empty - -type LogMap = M.Map String LogLine - -{- Inserts a log into a map of logs, if the log has better (ie, newer) - - information than the other logs in the map -} -mapLog :: LogLine -> LogMap -> LogMap -mapLog l m - | better = M.insert i l m - | otherwise = m - where - better = maybe True newer $ M.lookup i m - newer l' = date l' <= date l - i = info l - -instance Arbitrary LogLine where - arbitrary = LogLine - <$> arbitrary - <*> elements [minBound..maxBound] - <*> arbitrary `suchThat` ('\n' `notElem`) - -prop_parse_show_log :: [LogLine] -> Bool -prop_parse_show_log l = parseLog (showLog l) == l - diff --git a/Logs/Presence/Pure.hs b/Logs/Presence/Pure.hs new file mode 100644 index 000000000..ffeb78b26 --- /dev/null +++ b/Logs/Presence/Pure.hs @@ -0,0 +1,84 @@ +{- git-annex presence log, pure operations + - + - Copyright 2010-2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Presence.Pure where + +import Data.Time.Clock.POSIX +import Data.Time +import System.Locale +import qualified Data.Map as M + +import Common.Annex +import Utility.QuickCheck + +data LogLine = LogLine { + date :: POSIXTime, + status :: LogStatus, + info :: String +} deriving (Eq, Show) + +data LogStatus = InfoPresent | InfoMissing + deriving (Eq, Show, Bounded, Enum) + +{- Parses a log file. Unparseable lines are ignored. -} +parseLog :: String -> [LogLine] +parseLog = mapMaybe parseline . lines + where + parseline l = LogLine + <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d) + <*> parsestatus s + <*> pure rest + where + (d, pastd) = separate (== ' ') l + (s, rest) = separate (== ' ') pastd + parsestatus "1" = Just InfoPresent + parsestatus "0" = Just InfoMissing + parsestatus _ = Nothing + +{- Generates a log file. -} +showLog :: [LogLine] -> String +showLog = unlines . map genline + where + genline (LogLine d s i) = unwords [show d, genstatus s, i] + genstatus InfoPresent = "1" + genstatus InfoMissing = "0" + +{- Given a log, returns only the info that is are still in effect. -} +getLog :: String -> [String] +getLog = map info . filterPresent . parseLog + +{- Returns the info from LogLines that are in effect. -} +filterPresent :: [LogLine] -> [LogLine] +filterPresent = filter (\l -> InfoPresent == status l) . compactLog + +{- Compacts a set of logs, returning a subset that contains the current + - status. -} +compactLog :: [LogLine] -> [LogLine] +compactLog = M.elems . foldr mapLog M.empty + +type LogMap = M.Map String LogLine + +{- Inserts a log into a map of logs, if the log has better (ie, newer) + - information than the other logs in the map -} +mapLog :: LogLine -> LogMap -> LogMap +mapLog l m + | better = M.insert i l m + | otherwise = m + where + better = maybe True newer $ M.lookup i m + newer l' = date l' <= date l + i = info l + +instance Arbitrary LogLine where + arbitrary = LogLine + <$> arbitrary + <*> elements [minBound..maxBound] + <*> arbitrary `suchThat` ('\n' `notElem`) + +prop_parse_show_log :: [LogLine] -> Bool +prop_parse_show_log l = parseLog (showLog l) == l + diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs index d4b7d5eb3..783ce5090 100644 --- a/Logs/Transitions.hs +++ b/Logs/Transitions.hs @@ -7,16 +7,6 @@ - done that is listed in the remote branch by checking that the local - branch contains the same transition, with the same or newer start time. - - - When a remote branch that has had an transition performed on it - - becomes available for merging into the local git-annex branch, - - the transition is first performed on the local branch. - - - - When merging a remote branch into the local git-annex branch, - - all transitions that have been performed on the local branch must also - - have been performed on the remote branch already. (Or it would be - - possible to perform the transitions on a fixup branch and merge it, - - but that would be expensive.) - - - Copyright 2013 Joey Hess - - Licensed under the GNU GPL version 3 or higher. @@ -86,9 +76,6 @@ parseTransitionLine s = TransitionLine <$> pdate ds <*> readish ts combineTransitions :: [Transitions] -> Transitions combineTransitions = S.unions -inTransitions :: Transition -> Transitions -> Bool -inTransitions t = not . S.null . S.filter (\l -> transition l == t) - transitionList :: Transitions -> [Transition] transitionList = map transition . S.elems diff --git a/Logs/Trust.hs b/Logs/Trust.hs index 6c6b33f70..c6f0ad3ab 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -6,6 +6,7 @@ -} module Logs.Trust ( + module X, trustLog, TrustLevel(..), trustGet, @@ -16,8 +17,6 @@ module Logs.Trust ( lookupTrust, trustMapLoad, trustMapRaw, - - prop_parse_show_TrustLog, ) where import qualified Data.Map as M @@ -31,6 +30,7 @@ import Logs import Logs.UUIDBased import Remote.List import qualified Types.Remote +import Logs.Trust.Pure as X {- Returns a list of UUIDs that the trustLog indicates have the - specified trust level. @@ -94,26 +94,4 @@ trustMapLoad = do {- Does not include forcetrust or git config values, just those from the - log file. -} trustMapRaw :: Annex TrustMap -trustMapRaw = simpleMap . parseLog (Just . parseTrustLog) - <$> Annex.Branch.get trustLog - -{- The trust.log used to only list trusted repos, without a field for the - - trust status, which is why this defaults to Trusted. -} -parseTrustLog :: String -> TrustLevel -parseTrustLog s = maybe Trusted parse $ headMaybe $ words s - where - parse "1" = Trusted - parse "0" = UnTrusted - parse "X" = DeadTrusted - parse _ = SemiTrusted - -showTrustLog :: TrustLevel -> String -showTrustLog Trusted = "1" -showTrustLog UnTrusted = "0" -showTrustLog DeadTrusted = "X" -showTrustLog SemiTrusted = "?" - -prop_parse_show_TrustLog :: Bool -prop_parse_show_TrustLog = all check [minBound .. maxBound] - where - check l = parseTrustLog (showTrustLog l) == l +trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog diff --git a/Logs/Trust/Pure.hs b/Logs/Trust/Pure.hs new file mode 100644 index 000000000..11cfbe056 --- /dev/null +++ b/Logs/Trust/Pure.hs @@ -0,0 +1,36 @@ +{- git-annex trust log, pure operations + - + - Copyright 2010-2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Trust.Pure where + +import Common.Annex +import Types.TrustLevel +import Logs.UUIDBased + +calcTrustMap :: String -> TrustMap +calcTrustMap = simpleMap . parseLog (Just . parseTrustLog) + +{- The trust.log used to only list trusted repos, without a field for the + - trust status, which is why this defaults to Trusted. -} +parseTrustLog :: String -> TrustLevel +parseTrustLog s = maybe Trusted parse $ headMaybe $ words s + where + parse "1" = Trusted + parse "0" = UnTrusted + parse "X" = DeadTrusted + parse _ = SemiTrusted + +showTrustLog :: TrustLevel -> String +showTrustLog Trusted = "1" +showTrustLog UnTrusted = "0" +showTrustLog DeadTrusted = "X" +showTrustLog SemiTrusted = "?" + +prop_parse_show_TrustLog :: Bool +prop_parse_show_TrustLog = all check [minBound .. maxBound] + where + check l = parseTrustLog (showTrustLog l) == l diff --git a/debian/changelog b/debian/changelog index cb6fbee5a..a28c9f187 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,8 @@ git-annex (4.20130828) UNRELEASED; urgency=low * forget: New command, causes git-annex branch history to be forgotten in a way that will spread to other clones of the repository. (As long as they're running this version or newer of git-annex.) + * forget --drop-dead: Completely removes mentions of repositories that + have been marked as dead from the git-annex branch. * sync, assistant: Force push of the git-annex branch. Necessary to ensure it gets pushed to remotes after being rewritten by forget. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5fb0ce5a4..269588add 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -482,17 +482,16 @@ subdirectories). * forget Causes the git-annex branch to be rewritten, throwing away historical - data about past locations of files, files that are no longer present on - any remote, etc. The resulting branch will use less space, but for - example `git annex log` will not be able to show where files used to - be located. + data about past locations of files. The resulting branch will use less + space, but `git annex log` will not be able to show where + files used to be located. - To also prune references to remotes that have been marked as dead, - specify --dead. + To also prune references to repositories that have been marked as dead, + specify --drop-dead. When this rewritten branch is merged into other clones of the repository, git-annex will automatically perform the same rewriting - to their local git-annex branch. So the forgetfulness will automatically + to their local git-annex branches. So the forgetfulness will automatically propigate out from its starting point until all repositories running git-annex have forgotten their old history. (You may need to force git to push the branch to any git repositories not running git-annex. -- cgit v1.2.3