summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/Branch.hs91
-rw-r--r--Annex/Branch/Transitions.hs53
-rw-r--r--Command/Forget.hs31
-rw-r--r--Logs/Presence.hs83
-rw-r--r--Logs/Presence/Pure.hs84
-rw-r--r--Logs/Transitions.hs13
-rw-r--r--Logs/Trust.hs28
-rw-r--r--Logs/Trust/Pure.hs36
-rw-r--r--debian/changelog2
-rw-r--r--doc/git-annex.mdwn13
10 files changed, 279 insertions, 155 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 334b60634..9ee281de9 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -46,8 +46,12 @@ import Annex.CatFile
import Annex.Perms
import qualified Annex
import Utility.Env
+import Logs
import Logs.Transitions
+import Logs.Trust.Pure
import Annex.ReplaceFile
+import qualified Annex.Queue
+import Annex.Branch.Transitions
{- Name of the branch that is used to store git-annex's information. -}
name :: Git.Ref
@@ -194,7 +198,10 @@ get' :: FilePath -> Annex String
get' file = go =<< getJournalFile file
where
go (Just journalcontent) = return journalcontent
- go Nothing = withIndex $ L.unpack <$> catFile fullname file
+ go Nothing = getRaw file
+
+getRaw :: FilePath -> Annex String
+getRaw file = withIndex $ L.unpack <$> catFile fullname file
{- Applies a function to modifiy the content of a file.
-
@@ -272,13 +279,17 @@ commitBranch' branchref message parents = do
files :: Annex [FilePath]
files = do
update
- withIndex $ do
- bfiles <- inRepo $ Git.Command.pipeNullSplitZombie
- [ Params "ls-tree --name-only -r -z"
- , Param $ show fullname
- ]
- jfiles <- getJournalledFiles
- return $ jfiles ++ bfiles
+ (++)
+ <$> branchFiles
+ <*> getJournalledFiles
+
+{- Files in the branch, not including any from journalled changes,
+ - and without updating the branch. -}
+branchFiles :: Annex [FilePath]
+branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie
+ [ Params "ls-tree --name-only -r -z"
+ , Param $ show fullname
+ ]
{- Populates the branch's index file with the current branch contents.
-
@@ -436,20 +447,60 @@ getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content
- commits it to the branch, or creates a new branch, and returns
- the branch's ref. -}
performTransitions :: Transitions -> Bool -> Annex Git.Ref
-performTransitions ts neednewbranch = withIndex $ do
- when (inTransitions ForgetDeadRemotes ts) $
- error "TODO ForgetDeadRemotes transition"
- if neednewbranch
- then do
- committedref <- inRepo $ Git.Branch.commit message fullname []
- setIndexSha committedref
- return committedref
- else do
- ref <- getBranch
- commitBranch ref message [fullname]
- getBranch
+performTransitions ts neednewbranch = do
+ -- For simplicity & speed, we're going to use the Annex.Queue to
+ -- update the git-annex branch, while it usually holds changes
+ -- for the head branch. Flush any such changes.
+ Annex.Queue.flush
+ withIndex $ do
+ run $ mapMaybe getTransitionCalculator $ transitionList ts
+ Annex.Queue.flush
+ if neednewbranch
+ then do
+ committedref <- inRepo $ Git.Branch.commit message fullname []
+ setIndexSha committedref
+ return committedref
+ else do
+ ref <- getBranch
+ commitBranch ref message [fullname]
+ getBranch
where
message
| neednewbranch = "new branch for transition " ++ tdesc
| otherwise = "continuing transition " ++ tdesc
tdesc = show $ map describeTransition $ transitionList ts
+
+ {- The changes to make to the branch are calculated and applied to
+ - the branch directly, rather than going through the journal,
+ - which would be innefficient. (And the journal is not designed
+ - to hold changes to every file in the branch at once.)
+ -
+ - When a file in the branch is changed by transition code,
+ - that value is remembered and fed into the code for subsequent
+ - transitions.
+ -}
+ run [] = noop
+ run changers = do
+ trustmap <- calcTrustMap <$> getRaw trustLog
+ fs <- branchFiles
+ hasher <- inRepo hashObjectStart
+ forM_ fs $ \f -> do
+ content <- getRaw f
+ apply changers hasher f content trustmap
+ liftIO $ hashObjectStop hasher
+ apply [] _ _ _ _ = return ()
+ apply (changer:rest) hasher file content trustmap =
+ case changer file content trustmap of
+ RemoveFile -> do
+ Annex.Queue.addUpdateIndex
+ =<< inRepo (Git.UpdateIndex.unstageFile file)
+ -- File is deleted; can't run any other
+ -- transitions on it.
+ return ()
+ ChangeFile content' -> do
+ sha <- inRepo $ hashObject BlobObject content'
+ Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $
+ Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath file)
+ apply rest hasher file content' trustmap
+ PreserveFile ->
+ apply rest hasher file content trustmap
diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs
new file mode 100644
index 000000000..90002de62
--- /dev/null
+++ b/Annex/Branch/Transitions.hs
@@ -0,0 +1,53 @@
+{- git-annex branch transitions
+ -
+ - Copyright 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.Branch.Transitions (
+ FileTransition(..),
+ getTransitionCalculator
+) where
+
+import Logs
+import Logs.Transitions
+import Logs.UUIDBased as UUIDBased
+import Logs.Presence.Pure as Presence
+import Types.TrustLevel
+import Types.UUID
+
+import qualified Data.Map as M
+
+data FileTransition
+ = ChangeFile String
+ | RemoveFile
+ | PreserveFile
+
+type TransitionCalculator = FilePath -> String -> TrustMap -> FileTransition
+
+getTransitionCalculator :: Transition -> Maybe TransitionCalculator
+getTransitionCalculator ForgetGitHistory = Nothing
+getTransitionCalculator ForgetDeadRemotes = Just dropDead
+
+dropDead :: FilePath -> String -> TrustMap -> FileTransition
+dropDead f content trustmap = case getLogVariety f of
+ Just UUIDBasedLog -> ChangeFile $
+ UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content
+ Just (PresenceLog _) ->
+ let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
+ in if null newlog
+ then RemoveFile
+ else ChangeFile $ Presence.showLog newlog
+ Nothing -> PreserveFile
+
+dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String
+dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const
+
+{- Presence logs can contain UUIDs or other values. Any line that matches
+ - a dead uuid is dropped; any other values are passed through. -}
+dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine]
+dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info)
+
+notDead :: TrustMap -> (v -> UUID) -> v -> Bool
+notDead trustmap a v = M.findWithDefault SemiTrusted (a v) trustmap /= DeadTrusted
diff --git a/Command/Forget.hs b/Command/Forget.hs
index e405a9918..d216ae3ca 100644
--- a/Command/Forget.hs
+++ b/Command/Forget.hs
@@ -12,30 +12,41 @@ import Command
import qualified Annex.Branch as Branch
import Logs.Transitions
import qualified Annex
+import qualified Option
import Data.Time.Clock.POSIX
def :: [Command]
-def = [command "forget" paramNothing seek
+def = [withOptions forgetOptions $ command "forget" paramNothing seek
SectionMaintenance "prune git-annex branch history"]
+forgetOptions :: [Option]
+forgetOptions = [dropDeadOption]
+
+dropDeadOption :: Option
+dropDeadOption = Option.flag [] "drop-dead" "drop references to dead repositories"
+
seek :: [CommandSeek]
-seek = [withNothing start]
+seek = [withFlag dropDeadOption $ \dropdead ->
+ withNothing $ start dropdead]
-start :: CommandStart
-start = do
+start :: Bool -> CommandStart
+start dropdead = do
showStart "forget" "git-annex"
- next $ perform =<< Annex.getState Annex.force
-
-perform :: Bool -> CommandPerform
-perform True = do
now <- liftIO getPOSIXTime
- let ts = addTransition now ForgetGitHistory noTransitions
+ let basets = addTransition now ForgetGitHistory noTransitions
+ let ts = if dropdead
+ then addTransition now ForgetDeadRemotes basets
+ else basets
+ next $ perform ts =<< Annex.getState Annex.force
+
+perform :: Transitions -> Bool -> CommandPerform
+perform ts True = do
recordTransitions Branch.change ts
-- get branch committed before contining with the transition
Branch.update
void $ Branch.performTransitions ts True
next $ return True
-perform False = do
+perform _ False = do
showLongNote "To forget git-annex branch history, you must specify --force. This deletes metadata!"
stop
diff --git a/Logs/Presence.hs b/Logs/Presence.hs
index ec5cec209..516d59618 100644
--- a/Logs/Presence.hs
+++ b/Logs/Presence.hs
@@ -12,36 +12,18 @@
-}
module Logs.Presence (
- LogStatus(..),
- LogLine(LogLine),
+ module X,
addLog,
readLog,
- getLog,
- parseLog,
- showLog,
logNow,
- compactLog,
- currentLog,
- prop_parse_show_log,
+ currentLog
) where
import Data.Time.Clock.POSIX
-import Data.Time
-import System.Locale
-import qualified Data.Map as M
+import Logs.Presence.Pure as X
import Common.Annex
import qualified Annex.Branch
-import Utility.QuickCheck
-
-data LogLine = LogLine {
- date :: POSIXTime,
- status :: LogStatus,
- info :: String
-} deriving (Eq, Show)
-
-data LogStatus = InfoPresent | InfoMissing
- deriving (Eq, Show, Bounded, Enum)
addLog :: FilePath -> LogLine -> Annex ()
addLog file line = Annex.Branch.change file $ \s ->
@@ -52,29 +34,6 @@ addLog file line = Annex.Branch.change file $ \s ->
readLog :: FilePath -> Annex [LogLine]
readLog = parseLog <$$> Annex.Branch.get
-{- Parses a log file. Unparseable lines are ignored. -}
-parseLog :: String -> [LogLine]
-parseLog = mapMaybe parseline . lines
- where
- parseline l = LogLine
- <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d)
- <*> parsestatus s
- <*> pure rest
- where
- (d, pastd) = separate (== ' ') l
- (s, rest) = separate (== ' ') pastd
- parsestatus "1" = Just InfoPresent
- parsestatus "0" = Just InfoMissing
- parsestatus _ = Nothing
-
-{- Generates a log file. -}
-showLog :: [LogLine] -> String
-showLog = unlines . map genline
- where
- genline (LogLine d s i) = unwords [show d, genstatus s, i]
- genstatus InfoPresent = "1"
- genstatus InfoMissing = "0"
-
{- Generates a new LogLine with the current date. -}
logNow :: LogStatus -> String -> Annex LogLine
logNow s i = do
@@ -84,39 +43,3 @@ logNow s i = do
{- Reads a log and returns only the info that is still in effect. -}
currentLog :: FilePath -> Annex [String]
currentLog file = map info . filterPresent <$> readLog file
-
-{- Given a log, returns only the info that is are still in effect. -}
-getLog :: String -> [String]
-getLog = map info . filterPresent . parseLog
-
-{- Returns the info from LogLines that are in effect. -}
-filterPresent :: [LogLine] -> [LogLine]
-filterPresent = filter (\l -> InfoPresent == status l) . compactLog
-
-{- Compacts a set of logs, returning a subset that contains the current
- - status. -}
-compactLog :: [LogLine] -> [LogLine]
-compactLog = M.elems . foldr mapLog M.empty
-
-type LogMap = M.Map String LogLine
-
-{- Inserts a log into a map of logs, if the log has better (ie, newer)
- - information than the other logs in the map -}
-mapLog :: LogLine -> LogMap -> LogMap
-mapLog l m
- | better = M.insert i l m
- | otherwise = m
- where
- better = maybe True newer $ M.lookup i m
- newer l' = date l' <= date l
- i = info l
-
-instance Arbitrary LogLine where
- arbitrary = LogLine
- <$> arbitrary
- <*> elements [minBound..maxBound]
- <*> arbitrary `suchThat` ('\n' `notElem`)
-
-prop_parse_show_log :: [LogLine] -> Bool
-prop_parse_show_log l = parseLog (showLog l) == l
-
diff --git a/Logs/Presence/Pure.hs b/Logs/Presence/Pure.hs
new file mode 100644
index 000000000..ffeb78b26
--- /dev/null
+++ b/Logs/Presence/Pure.hs
@@ -0,0 +1,84 @@
+{- git-annex presence log, pure operations
+ -
+ - Copyright 2010-2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.Presence.Pure where
+
+import Data.Time.Clock.POSIX
+import Data.Time
+import System.Locale
+import qualified Data.Map as M
+
+import Common.Annex
+import Utility.QuickCheck
+
+data LogLine = LogLine {
+ date :: POSIXTime,
+ status :: LogStatus,
+ info :: String
+} deriving (Eq, Show)
+
+data LogStatus = InfoPresent | InfoMissing
+ deriving (Eq, Show, Bounded, Enum)
+
+{- Parses a log file. Unparseable lines are ignored. -}
+parseLog :: String -> [LogLine]
+parseLog = mapMaybe parseline . lines
+ where
+ parseline l = LogLine
+ <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d)
+ <*> parsestatus s
+ <*> pure rest
+ where
+ (d, pastd) = separate (== ' ') l
+ (s, rest) = separate (== ' ') pastd
+ parsestatus "1" = Just InfoPresent
+ parsestatus "0" = Just InfoMissing
+ parsestatus _ = Nothing
+
+{- Generates a log file. -}
+showLog :: [LogLine] -> String
+showLog = unlines . map genline
+ where
+ genline (LogLine d s i) = unwords [show d, genstatus s, i]
+ genstatus InfoPresent = "1"
+ genstatus InfoMissing = "0"
+
+{- Given a log, returns only the info that is are still in effect. -}
+getLog :: String -> [String]
+getLog = map info . filterPresent . parseLog
+
+{- Returns the info from LogLines that are in effect. -}
+filterPresent :: [LogLine] -> [LogLine]
+filterPresent = filter (\l -> InfoPresent == status l) . compactLog
+
+{- Compacts a set of logs, returning a subset that contains the current
+ - status. -}
+compactLog :: [LogLine] -> [LogLine]
+compactLog = M.elems . foldr mapLog M.empty
+
+type LogMap = M.Map String LogLine
+
+{- Inserts a log into a map of logs, if the log has better (ie, newer)
+ - information than the other logs in the map -}
+mapLog :: LogLine -> LogMap -> LogMap
+mapLog l m
+ | better = M.insert i l m
+ | otherwise = m
+ where
+ better = maybe True newer $ M.lookup i m
+ newer l' = date l' <= date l
+ i = info l
+
+instance Arbitrary LogLine where
+ arbitrary = LogLine
+ <$> arbitrary
+ <*> elements [minBound..maxBound]
+ <*> arbitrary `suchThat` ('\n' `notElem`)
+
+prop_parse_show_log :: [LogLine] -> Bool
+prop_parse_show_log l = parseLog (showLog l) == l
+
diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs
index d4b7d5eb3..783ce5090 100644
--- a/Logs/Transitions.hs
+++ b/Logs/Transitions.hs
@@ -7,16 +7,6 @@
- done that is listed in the remote branch by checking that the local
- branch contains the same transition, with the same or newer start time.
-
- - When a remote branch that has had an transition performed on it
- - becomes available for merging into the local git-annex branch,
- - the transition is first performed on the local branch.
- -
- - When merging a remote branch into the local git-annex branch,
- - all transitions that have been performed on the local branch must also
- - have been performed on the remote branch already. (Or it would be
- - possible to perform the transitions on a fixup branch and merge it,
- - but that would be expensive.)
- -
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
@@ -86,9 +76,6 @@ parseTransitionLine s = TransitionLine <$> pdate ds <*> readish ts
combineTransitions :: [Transitions] -> Transitions
combineTransitions = S.unions
-inTransitions :: Transition -> Transitions -> Bool
-inTransitions t = not . S.null . S.filter (\l -> transition l == t)
-
transitionList :: Transitions -> [Transition]
transitionList = map transition . S.elems
diff --git a/Logs/Trust.hs b/Logs/Trust.hs
index 6c6b33f70..c6f0ad3ab 100644
--- a/Logs/Trust.hs
+++ b/Logs/Trust.hs
@@ -6,6 +6,7 @@
-}
module Logs.Trust (
+ module X,
trustLog,
TrustLevel(..),
trustGet,
@@ -16,8 +17,6 @@ module Logs.Trust (
lookupTrust,
trustMapLoad,
trustMapRaw,
-
- prop_parse_show_TrustLog,
) where
import qualified Data.Map as M
@@ -31,6 +30,7 @@ import Logs
import Logs.UUIDBased
import Remote.List
import qualified Types.Remote
+import Logs.Trust.Pure as X
{- Returns a list of UUIDs that the trustLog indicates have the
- specified trust level.
@@ -94,26 +94,4 @@ trustMapLoad = do
{- Does not include forcetrust or git config values, just those from the
- log file. -}
trustMapRaw :: Annex TrustMap
-trustMapRaw = simpleMap . parseLog (Just . parseTrustLog)
- <$> Annex.Branch.get trustLog
-
-{- The trust.log used to only list trusted repos, without a field for the
- - trust status, which is why this defaults to Trusted. -}
-parseTrustLog :: String -> TrustLevel
-parseTrustLog s = maybe Trusted parse $ headMaybe $ words s
- where
- parse "1" = Trusted
- parse "0" = UnTrusted
- parse "X" = DeadTrusted
- parse _ = SemiTrusted
-
-showTrustLog :: TrustLevel -> String
-showTrustLog Trusted = "1"
-showTrustLog UnTrusted = "0"
-showTrustLog DeadTrusted = "X"
-showTrustLog SemiTrusted = "?"
-
-prop_parse_show_TrustLog :: Bool
-prop_parse_show_TrustLog = all check [minBound .. maxBound]
- where
- check l = parseTrustLog (showTrustLog l) == l
+trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog
diff --git a/Logs/Trust/Pure.hs b/Logs/Trust/Pure.hs
new file mode 100644
index 000000000..11cfbe056
--- /dev/null
+++ b/Logs/Trust/Pure.hs
@@ -0,0 +1,36 @@
+{- git-annex trust log, pure operations
+ -
+ - Copyright 2010-2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.Trust.Pure where
+
+import Common.Annex
+import Types.TrustLevel
+import Logs.UUIDBased
+
+calcTrustMap :: String -> TrustMap
+calcTrustMap = simpleMap . parseLog (Just . parseTrustLog)
+
+{- The trust.log used to only list trusted repos, without a field for the
+ - trust status, which is why this defaults to Trusted. -}
+parseTrustLog :: String -> TrustLevel
+parseTrustLog s = maybe Trusted parse $ headMaybe $ words s
+ where
+ parse "1" = Trusted
+ parse "0" = UnTrusted
+ parse "X" = DeadTrusted
+ parse _ = SemiTrusted
+
+showTrustLog :: TrustLevel -> String
+showTrustLog Trusted = "1"
+showTrustLog UnTrusted = "0"
+showTrustLog DeadTrusted = "X"
+showTrustLog SemiTrusted = "?"
+
+prop_parse_show_TrustLog :: Bool
+prop_parse_show_TrustLog = all check [minBound .. maxBound]
+ where
+ check l = parseTrustLog (showTrustLog l) == l
diff --git a/debian/changelog b/debian/changelog
index cb6fbee5a..a28c9f187 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,8 @@ git-annex (4.20130828) UNRELEASED; urgency=low
* forget: New command, causes git-annex branch history to be forgotten
in a way that will spread to other clones of the repository.
(As long as they're running this version or newer of git-annex.)
+ * forget --drop-dead: Completely removes mentions of repositories that
+ have been marked as dead from the git-annex branch.
* sync, assistant: Force push of the git-annex branch. Necessary
to ensure it gets pushed to remotes after being rewritten by forget.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 5fb0ce5a4..269588add 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -482,17 +482,16 @@ subdirectories).
* forget
Causes the git-annex branch to be rewritten, throwing away historical
- data about past locations of files, files that are no longer present on
- any remote, etc. The resulting branch will use less space, but for
- example `git annex log` will not be able to show where files used to
- be located.
+ data about past locations of files. The resulting branch will use less
+ space, but `git annex log` will not be able to show where
+ files used to be located.
- To also prune references to remotes that have been marked as dead,
- specify --dead.
+ To also prune references to repositories that have been marked as dead,
+ specify --drop-dead.
When this rewritten branch is merged into other clones of
the repository, git-annex will automatically perform the same rewriting
- to their local git-annex branch. So the forgetfulness will automatically
+ to their local git-annex branches. So the forgetfulness will automatically
propigate out from its starting point until all repositories running
git-annex have forgotten their old history. (You may need to force
git to push the branch to any git repositories not running git-annex.