diff options
author | Joey Hess <joeyh@joeyh.name> | 2017-08-14 13:55:38 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2017-08-14 14:19:58 -0400 |
commit | aecfea27593bc121273fe53a6c11d4a22567004f (patch) | |
tree | 25d0b10649fe8da0d5bf5eeeacb9519690035192 | |
parent | 0a16e9c3940d075e2cf88df75beeb0e1aaba4f01 (diff) |
Added GIT_ANNEX_VECTOR_CLOCK environment variable
Can be used to override the default timestamps used in log files in the
git-annex branch. This is a dangerous environment variable; use with
caution.
Note that this only affects writing to the logs on the git-annex branch.
It is not used for metadata in git commits (other env vars can be set for
that).
There are many other places where timestamps are still used, that don't
get committed to git, but do touch disk. Including regular timestamps
of files, and timestamps embedded in some files in .git/annex/, including
the last fsck timestamp and timestamps in transfer log files.
A good way to find such things in git-annex is to get for getPOSIXTime and
getCurrentTime, although some of the results are of course false positives
that never hit disk (unless git-annex gets swapped out..)
So this commit does NOT necessarily make git-annex comply with some HIPPA
privacy regulations; it's up to the user to determine if they can use it in
a way compliant with such regulations.
Benchmarking: It takes 0.00114 milliseconds to call getEnv
"GIT_ANNEX_VECTOR_CLOCK" when that env var is not set. So, 100 thousand log
files can be written with an added overhead of only 0.114 seconds. That
should be by far swamped by the actual overhead of writing the log files
and making the commit containing them.
This commit was supported by the NSF-funded DataLad project.
-rw-r--r-- | Annex/VectorClock.hs | 39 | ||||
-rw-r--r-- | CHANGELOG | 3 | ||||
-rw-r--r-- | Command/Expire.hs | 9 | ||||
-rw-r--r-- | Command/Forget.hs | 9 | ||||
-rw-r--r-- | Command/MetaData.hs | 28 | ||||
-rw-r--r-- | Logs/Activity.hs | 6 | ||||
-rw-r--r-- | Logs/Chunk.hs | 5 | ||||
-rw-r--r-- | Logs/Config.hs | 5 | ||||
-rw-r--r-- | Logs/Difference.hs | 5 | ||||
-rw-r--r-- | Logs/Group.hs | 5 | ||||
-rw-r--r-- | Logs/Location.hs | 6 | ||||
-rw-r--r-- | Logs/MapLog.hs | 39 | ||||
-rw-r--r-- | Logs/MetaData.hs | 17 | ||||
-rw-r--r-- | Logs/Multicast.hs | 6 | ||||
-rw-r--r-- | Logs/PreferredContent/Raw.hs | 13 | ||||
-rw-r--r-- | Logs/Presence.hs | 9 | ||||
-rw-r--r-- | Logs/Presence/Pure.hs | 18 | ||||
-rw-r--r-- | Logs/Remote.hs | 13 | ||||
-rw-r--r-- | Logs/RemoteState.hs | 5 | ||||
-rw-r--r-- | Logs/Schedule.hs | 5 | ||||
-rw-r--r-- | Logs/SingleValue.hs | 12 | ||||
-rw-r--r-- | Logs/Transitions.hs | 14 | ||||
-rw-r--r-- | Logs/Trust/Basic.hs | 6 | ||||
-rw-r--r-- | Logs/UUID.hs | 16 | ||||
-rw-r--r-- | Logs/UUIDBased.hs | 13 | ||||
-rw-r--r-- | Test.hs | 3 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 13 | ||||
-rw-r--r-- | git-annex.cabal | 1 |
28 files changed, 185 insertions, 138 deletions
diff --git a/Annex/VectorClock.hs b/Annex/VectorClock.hs new file mode 100644 index 000000000..24a6bd1fd --- /dev/null +++ b/Annex/VectorClock.hs @@ -0,0 +1,39 @@ +{- git-annex vector clocks + - + - We don't have a way yet to keep true distributed vector clocks. + - The next best thing is a timestamp. + - + - Copyright 2017 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.VectorClock where + +import Data.Time.Clock.POSIX +import Control.Applicative +import Prelude + +import Utility.Env +import Logs.TimeStamp +import Utility.QuickCheck + +-- | Some very old logs did not have any time stamp at all; +-- Unknown is used for those. +data VectorClock = Unknown | VectorClock POSIXTime + deriving (Eq, Ord, Show) + +-- Unknown is oldest. +prop_VectorClock_sane :: Bool +prop_VectorClock_sane = Unknown < VectorClock 1 + +instance Arbitrary VectorClock where + arbitrary = VectorClock <$> arbitrary + +currentVectorClock :: IO VectorClock +currentVectorClock = go =<< getEnv "GIT_ANNEX_VECTOR_CLOCK" + where + go Nothing = VectorClock <$> getPOSIXTime + go (Just s) = case parsePOSIXTime s of + Just t -> return (VectorClock t) + Nothing -> VectorClock <$> getPOSIXTime @@ -2,6 +2,9 @@ git-annex (6.20170521) UNRELEASED; urgency=medium * Fix build with QuickCheck 2.10. * fsck: Support --json. + * Added GIT_ANNEX_VECTOR_CLOCK environment variable, which can be used to + override the default timestamps used in log files in the git-annex + branch. This is a dangerous environment variable; use with caution. -- Joey Hess <id@joeyh.name> Sat, 17 Jun 2017 13:02:24 -0400 diff --git a/Command/Expire.hs b/Command/Expire.hs index 8dd0e962e..551742304 100644 --- a/Command/Expire.hs +++ b/Command/Expire.hs @@ -13,6 +13,7 @@ import Logs.UUID import Logs.MapLog import Logs.Trust import Annex.UUID +import Annex.VectorClock import qualified Remote import Utility.HumanTime @@ -70,15 +71,15 @@ start (Expire expire) noact actlog descs u = where lastact = changed <$> M.lookup u actlog whenactive = case lastact of - Just (Date t) -> do - d <- liftIO $ durationSince $ posixSecondsToUTCTime t + Just (VectorClock c) -> do + d <- liftIO $ durationSince $ posixSecondsToUTCTime c return $ "last active: " ++ fromDuration d ++ " ago" _ -> return "no activity" desc = fromUUID u ++ " " ++ fromMaybe "" (M.lookup u descs) notexpired ent = case ent of Unknown -> False - Date t -> case lookupexpire of - Just (Just expiretime) -> t >= expiretime + VectorClock c -> case lookupexpire of + Just (Just expiretime) -> c >= expiretime _ -> True lookupexpire = headMaybe $ catMaybes $ map (`M.lookup` expire) [Just u, Nothing] diff --git a/Command/Forget.hs b/Command/Forget.hs index 583eee7ca..d172cc693 100644 --- a/Command/Forget.hs +++ b/Command/Forget.hs @@ -11,8 +11,7 @@ import Command import qualified Annex.Branch as Branch import Logs.Transitions import qualified Annex - -import Data.Time.Clock.POSIX +import Annex.VectorClock cmd :: Command cmd = command "forget" SectionMaintenance @@ -36,10 +35,10 @@ seek = commandAction . start start :: ForgetOptions -> CommandStart start o = do showStart "forget" "git-annex" - now <- liftIO getPOSIXTime - let basets = addTransition now ForgetGitHistory noTransitions + c <- liftIO currentVectorClock + let basets = addTransition c ForgetGitHistory noTransitions let ts = if dropDead o - then addTransition now ForgetDeadRemotes basets + then addTransition c ForgetDeadRemotes basets else basets next $ perform ts =<< Annex.getState Annex.force diff --git a/Command/MetaData.hs b/Command/MetaData.hs index 617b291a1..d10fc9921 100644 --- a/Command/MetaData.hs +++ b/Command/MetaData.hs @@ -9,6 +9,7 @@ module Command.MetaData where import Command import Annex.MetaData +import Annex.VectorClock import Logs.MetaData import Annex.WorkTree import Messages.JSON (JSONActionItem(..)) @@ -18,7 +19,6 @@ import qualified Data.Set as S import qualified Data.Map as M import qualified Data.Text as T import qualified Data.ByteString.Lazy.UTF8 as BU -import Data.Time.Clock.POSIX import Data.Aeson import Control.Concurrent @@ -68,28 +68,28 @@ optParser desc = MetaDataOptions seek :: MetaDataOptions -> CommandSeek seek o = case batchOption o of NoBatch -> do - now <- liftIO getPOSIXTime + c <- liftIO currentVectorClock let seeker = case getSet o of Get _ -> withFilesInGit GetAll -> withFilesInGit Set _ -> withFilesInGitNonRecursive "Not recursively setting metadata. Use --force to do that." withKeyOptions (keyOptions o) False - (startKeys now o) - (seeker $ whenAnnexed $ start now o) + (startKeys c o) + (seeker $ whenAnnexed $ start c o) (forFiles o) Batch -> withMessageState $ \s -> case outputType s of JSONOutput _ -> batchInput parseJSONInput $ commandAction . startBatch _ -> giveup "--batch is currently only supported in --json mode" -start :: POSIXTime -> MetaDataOptions -> FilePath -> Key -> CommandStart -start now o file k = startKeys now o k (mkActionItem afile) +start :: VectorClock -> MetaDataOptions -> FilePath -> Key -> CommandStart +start c o file k = startKeys c o k (mkActionItem afile) where afile = AssociatedFile (Just file) -startKeys :: POSIXTime -> MetaDataOptions -> Key -> ActionItem -> CommandStart -startKeys now o k ai = case getSet o of +startKeys :: VectorClock -> MetaDataOptions -> Key -> ActionItem -> CommandStart +startKeys c o k ai = case getSet o of Get f -> do l <- S.toList . currentMetaDataValues f <$> getCurrentMetaData k liftIO $ forM_ l $ @@ -97,14 +97,14 @@ startKeys now o k ai = case getSet o of stop _ -> do showStart' "metadata" k ai - next $ perform now o k + next $ perform c o k -perform :: POSIXTime -> MetaDataOptions -> Key -> CommandPerform -perform now o k = case getSet o of +perform :: VectorClock -> MetaDataOptions -> Key -> CommandPerform +perform c o k = case getSet o of Set ms -> do oldm <- getCurrentMetaData k let m = combineMetaData $ map (modMeta oldm) ms - addMetaData' k m now + addMetaData' k m c next $ cleanup k _ -> next $ cleanup k @@ -169,7 +169,7 @@ startBatch (i, (MetaData m)) = case i of , keyOptions = Nothing , batchOption = NoBatch } - now <- liftIO getPOSIXTime + t <- liftIO currentVectorClock -- It would be bad if two batch mode changes used exactly -- the same timestamp, since the order of adds and removals -- of the same metadata value would then be indeterminate. @@ -178,7 +178,7 @@ startBatch (i, (MetaData m)) = case i of -- probably less expensive than cleaner methods, -- such as taking from a list of increasing timestamps. liftIO $ threadDelay 1 - next $ perform now o k + next $ perform t o k mkModMeta (f, s) | S.null s = DelMeta f Nothing | otherwise = SetMeta f s diff --git a/Logs/Activity.hs b/Logs/Activity.hs index 6f5bf0deb..d7474704e 100644 --- a/Logs/Activity.hs +++ b/Logs/Activity.hs @@ -12,8 +12,6 @@ module Logs.Activity ( lastActivities, ) where -import Data.Time.Clock.POSIX - import Annex.Common import qualified Annex.Branch import Logs @@ -24,9 +22,9 @@ data Activity = Fsck recordActivity :: Activity -> UUID -> Annex () recordActivity act uuid = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change activityLog $ - showLog show . changeLog ts uuid act . parseLog readish + showLog show . changeLog c uuid act . parseLog readish lastActivities :: Maybe Activity -> Annex (Log Activity) lastActivities wantact = parseLog onlywanted <$> Annex.Branch.get activityLog diff --git a/Logs/Chunk.hs b/Logs/Chunk.hs index b591a2a6d..0a419716b 100644 --- a/Logs/Chunk.hs +++ b/Logs/Chunk.hs @@ -32,14 +32,13 @@ import Logs.Chunk.Pure import qualified Annex import qualified Data.Map as M -import Data.Time.Clock.POSIX chunksStored :: UUID -> Key -> ChunkMethod -> ChunkCount -> Annex () chunksStored u k chunkmethod chunkcount = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock config <- Annex.getGitConfig Annex.Branch.change (chunkLogFile config k) $ - showLog . changeMapLog ts (u, chunkmethod) chunkcount . parseLog + showLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex () chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0 diff --git a/Logs/Config.hs b/Logs/Config.hs index b16a64dba..7d1576b27 100644 --- a/Logs/Config.hs +++ b/Logs/Config.hs @@ -19,7 +19,6 @@ import Logs import Logs.MapLog import qualified Annex.Branch -import Data.Time.Clock.POSIX import qualified Data.Map as M type ConfigName = String @@ -33,9 +32,9 @@ setGlobalConfig name new = do setGlobalConfig' :: ConfigName -> ConfigValue -> Annex () setGlobalConfig' name new = do - now <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change configLog $ - showMapLog id id . changeMapLog now name new . parseGlobalConfig + showMapLog id id . changeMapLog c name new . parseGlobalConfig unsetGlobalConfig :: ConfigName -> Annex () unsetGlobalConfig name = do diff --git a/Logs/Difference.hs b/Logs/Difference.hs index 9817393e3..e392d3f11 100644 --- a/Logs/Difference.hs +++ b/Logs/Difference.hs @@ -12,7 +12,6 @@ module Logs.Difference ( module Logs.Difference.Pure ) where -import Data.Time.Clock.POSIX import qualified Data.Map as M import Annex.Common @@ -24,9 +23,9 @@ import Logs.Difference.Pure recordDifferences :: Differences -> UUID -> Annex () recordDifferences ds@(Differences {}) uuid = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change differenceLog $ - showLog id . changeLog ts uuid (showDifferences ds) . parseLog Just + showLog id . changeLog c uuid (showDifferences ds) . parseLog Just recordDifferences UnknownDifferences _ = return () -- Map of UUIDs that have Differences recorded. diff --git a/Logs/Group.hs b/Logs/Group.hs index 7090e7b45..b43062746 100644 --- a/Logs/Group.hs +++ b/Logs/Group.hs @@ -18,7 +18,6 @@ module Logs.Group ( import qualified Data.Map as M import qualified Data.Set as S -import Data.Time.Clock.POSIX import Annex.Common import Logs @@ -36,10 +35,10 @@ lookupGroups u = (fromMaybe S.empty . M.lookup u) . groupsByUUID <$> groupMap groupChange :: UUID -> (S.Set Group -> S.Set Group) -> Annex () groupChange uuid@(UUID _) modifier = do curr <- lookupGroups uuid - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change groupLog $ showLog (unwords . S.toList) . - changeLog ts uuid (modifier curr) . + changeLog c uuid (modifier curr) . parseLog (Just . S.fromList . words) -- The changed group invalidates the preferred content cache. diff --git a/Logs/Location.hs b/Logs/Location.hs index 5ead34be6..a94dc9089 100644 --- a/Logs/Location.hs +++ b/Logs/Location.hs @@ -33,6 +33,7 @@ import Logs import Logs.Presence import Annex.UUID import Annex.CatFile +import Annex.VectorClock import Git.Types (RefDate, Ref) import qualified Annex @@ -107,7 +108,10 @@ setDead key = do setDead' :: LogLine -> LogLine setDead' l = l { status = InfoDead - , date = date l + realToFrac (picosecondsToDiffTime 1) + , date = case date l of + VectorClock c -> VectorClock $ + c + realToFrac (picosecondsToDiffTime 1) + Unknown -> Unknown } {- Finds all keys that have location log information. diff --git a/Logs/MapLog.hs b/Logs/MapLog.hs index 097439ac5..7fe9e5782 100644 --- a/Logs/MapLog.hs +++ b/Logs/MapLog.hs @@ -11,20 +11,21 @@ - Licensed under the GNU GPL version 3 or higher. -} -module Logs.MapLog where - -import qualified Data.Map as M -import Data.Time.Clock.POSIX +module Logs.MapLog ( + module Logs.MapLog, + VectorClock, + currentVectorClock, +) where import Common +import Annex.VectorClock import Logs.TimeStamp import Logs.Line -data TimeStamp = Unknown | Date POSIXTime - deriving (Eq, Ord, Show) +import qualified Data.Map as M data LogEntry v = LogEntry - { changed :: TimeStamp + { changed :: VectorClock , value :: v } deriving (Eq, Show) @@ -33,8 +34,8 @@ type MapLog f v = M.Map f (LogEntry v) showMapLog :: (f -> String) -> (v -> String) -> MapLog f v -> String showMapLog fieldshower valueshower = unlines . map showpair . M.toList where - showpair (f, LogEntry (Date p) v) = - unwords [show p, fieldshower f, valueshower v] + showpair (f, LogEntry (VectorClock c) v) = + unwords [show c, fieldshower f, valueshower v] showpair (f, LogEntry Unknown v) = unwords ["0", fieldshower f, valueshower v] @@ -44,14 +45,14 @@ parseMapLog fieldparser valueparser = M.fromListWith best . mapMaybe parse . spl parse line = do let (ts, rest) = splitword line (sf, sv) = splitword rest - date <- Date <$> parsePOSIXTime ts + c <- VectorClock <$> parsePOSIXTime ts f <- fieldparser sf v <- valueparser sv - Just (f, LogEntry date v) + Just (f, LogEntry c v) splitword = separate (== ' ') -changeMapLog :: Ord f => POSIXTime -> f -> v -> MapLog f v -> MapLog f v -changeMapLog t f v = M.insert f $ LogEntry (Date t) v +changeMapLog :: Ord f => VectorClock -> f -> v -> MapLog f v -> MapLog f v +changeMapLog c f v = M.insert f $ LogEntry c v {- Only add an LogEntry if it's newer (or at least as new as) than any - existing LogEntry for a field. -} @@ -69,15 +70,11 @@ best new old | changed old > changed new = old | otherwise = new --- Unknown is oldest. -prop_TimeStamp_sane :: Bool -prop_TimeStamp_sane = Unknown < Date 1 - prop_addMapLog_sane :: Bool prop_addMapLog_sane = newWins && newestWins where - newWins = addMapLog ("foo") (LogEntry (Date 1) "new") l == l2 - newestWins = addMapLog ("foo") (LogEntry (Date 1) "newest") l2 /= l2 + newWins = addMapLog ("foo") (LogEntry (VectorClock 1) "new") l == l2 + newestWins = addMapLog ("foo") (LogEntry (VectorClock 1) "newest") l2 /= l2 - l = M.fromList [("foo", LogEntry (Date 0) "old")] - l2 = M.fromList [("foo", LogEntry (Date 1) "new")] + l = M.fromList [("foo", LogEntry (VectorClock 0) "old")] + l2 = M.fromList [("foo", LogEntry (VectorClock 1) "new")] diff --git a/Logs/MetaData.hs b/Logs/MetaData.hs index 52370d2c5..92e396541 100644 --- a/Logs/MetaData.hs +++ b/Logs/MetaData.hs @@ -36,6 +36,7 @@ module Logs.MetaData ( import Annex.Common import Types.MetaData import Annex.MetaData.StandardFields +import Annex.VectorClock import qualified Annex.Branch import qualified Annex import Logs @@ -44,7 +45,6 @@ import Logs.TimeStamp import qualified Data.Set as S import qualified Data.Map as M -import Data.Time.Clock.POSIX instance SingleValueSerializable MetaData where serialize = Types.MetaData.serialize @@ -83,26 +83,29 @@ getCurrentMetaData k = do let MetaData m = value l ts = lastchangedval l in M.map (const ts) m - lastchangedval l = S.singleton $ toMetaValue $ showts $ changed l + lastchangedval l = S.singleton $ toMetaValue $ showts $ + case changed l of + VectorClock t -> t + Unknown -> 0 showts = formatPOSIXTime "%F@%H-%M-%S" {- Adds in some metadata, which can override existing values, or unset - them, but otherwise leaves any existing metadata as-is. -} addMetaData :: Key -> MetaData -> Annex () -addMetaData k metadata = addMetaData' k metadata =<< liftIO getPOSIXTime +addMetaData k metadata = addMetaData' k metadata =<< liftIO currentVectorClock -{- Reusing the same timestamp when making changes to the metadata +{- Reusing the same VectorClock when making changes to the metadata - of multiple keys is a nice optimisation. The same metadata lines - will tend to be generated across the different log files, and so - git will be able to pack the data more efficiently. -} -addMetaData' :: Key -> MetaData -> POSIXTime -> Annex () -addMetaData' k d@(MetaData m) now +addMetaData' :: Key -> MetaData -> VectorClock -> Annex () +addMetaData' k d@(MetaData m) c | d == emptyMetaData = noop | otherwise = do config <- Annex.getGitConfig Annex.Branch.change (metaDataLogFile config k) $ showLog . simplifyLog - . S.insert (LogEntry now metadata) + . S.insert (LogEntry c metadata) . parseLog where metadata = MetaData $ M.filterWithKey (\f _ -> not (isLastChangedField f)) m diff --git a/Logs/Multicast.hs b/Logs/Multicast.hs index 386899fdf..8deb2800b 100644 --- a/Logs/Multicast.hs +++ b/Logs/Multicast.hs @@ -11,8 +11,6 @@ module Logs.Multicast ( knownFingerPrints, ) where -import Data.Time.Clock.POSIX - import Annex.Common import qualified Annex.Branch import Logs @@ -25,9 +23,9 @@ newtype Fingerprint = Fingerprint String recordFingerprint :: Fingerprint -> UUID -> Annex () recordFingerprint fp uuid = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change multicastLog $ - showLog show . changeLog ts uuid fp . parseLog readish + showLog show . changeLog c uuid fp . parseLog readish knownFingerPrints :: Annex (M.Map UUID Fingerprint) knownFingerPrints = simpleMap . parseLog readish <$> Annex.Branch.get activityLog diff --git a/Logs/PreferredContent/Raw.hs b/Logs/PreferredContent/Raw.hs index e23b09c55..8df5edd43 100644 --- a/Logs/PreferredContent/Raw.hs +++ b/Logs/PreferredContent/Raw.hs @@ -7,9 +7,6 @@ module Logs.PreferredContent.Raw where -import qualified Data.Map as M -import Data.Time.Clock.POSIX - import Annex.Common import qualified Annex.Branch import qualified Annex @@ -19,6 +16,8 @@ import Logs.MapLog import Types.StandardGroups import Types.Group +import qualified Data.Map as M + {- Changes the preferred content configuration of a remote. -} preferredContentSet :: UUID -> PreferredContentExpression -> Annex () preferredContentSet = setLog preferredContentLog @@ -28,10 +27,10 @@ requiredContentSet = setLog requiredContentLog setLog :: FilePath -> UUID -> PreferredContentExpression -> Annex () setLog logfile uuid@(UUID _) val = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change logfile $ showLog id - . changeLog ts uuid val + . changeLog c uuid val . parseLog Just Annex.changeState $ \s -> s { Annex.preferredcontentmap = Nothing @@ -42,10 +41,10 @@ setLog _ NoUUID _ = error "unknown UUID; cannot modify" {- Changes the preferred content configuration of a group. -} groupPreferredContentSet :: Group -> PreferredContentExpression -> Annex () groupPreferredContentSet g val = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change groupPreferredContentLog $ showMapLog id id - . changeMapLog ts g val + . changeMapLog c g val . parseMapLog Just Just Annex.changeState $ \s -> s { Annex.preferredcontentmap = Nothing } diff --git a/Logs/Presence.hs b/Logs/Presence.hs index 29b786e5e..382a5a302 100644 --- a/Logs/Presence.hs +++ b/Logs/Presence.hs @@ -22,10 +22,9 @@ module Logs.Presence ( historicalLogInfo, ) where -import Data.Time.Clock.POSIX - import Logs.Presence.Pure as X import Annex.Common +import Annex.VectorClock import qualified Annex.Branch import Git.Types (RefDate) @@ -49,11 +48,11 @@ maybeAddLog file line = Annex.Branch.maybeChange file $ \s -> do readLog :: FilePath -> Annex [LogLine] readLog = parseLog <$$> Annex.Branch.get -{- Generates a new LogLine with the current date. -} +{- Generates a new LogLine with the current time. -} logNow :: LogStatus -> String -> Annex LogLine logNow s i = do - now <- liftIO getPOSIXTime - return $ LogLine now s i + c <- liftIO currentVectorClock + return $ LogLine c s i {- Reads a log and returns only the info that is still in effect. -} currentLogInfo :: FilePath -> Annex [String] diff --git a/Logs/Presence/Pure.hs b/Logs/Presence/Pure.hs index 8abf5e52b..03cbdcdc1 100644 --- a/Logs/Presence/Pure.hs +++ b/Logs/Presence/Pure.hs @@ -7,19 +7,19 @@ module Logs.Presence.Pure where -import Data.Time.Clock.POSIX -import qualified Data.Map as M - import Annex.Common +import Annex.VectorClock import Logs.TimeStamp import Logs.Line import Utility.QuickCheck -data LogLine = LogLine { - date :: POSIXTime, - status :: LogStatus, - info :: String -} deriving (Eq, Show) +import qualified Data.Map as M + +data LogLine = LogLine + { date :: VectorClock + , status :: LogStatus + , info :: String + } deriving (Eq, Show) data LogStatus = InfoPresent | InfoMissing | InfoDead deriving (Eq, Show, Bounded, Enum) @@ -29,7 +29,7 @@ parseLog :: String -> [LogLine] parseLog = mapMaybe parseline . splitLines where parseline l = LogLine - <$> parsePOSIXTime d + <$> (VectorClock <$> parsePOSIXTime d) <*> parseStatus s <*> pure rest where diff --git a/Logs/Remote.hs b/Logs/Remote.hs index 1eb1c41b1..47a339a5f 100644 --- a/Logs/Remote.hs +++ b/Logs/Remote.hs @@ -18,22 +18,21 @@ module Logs.Remote ( prop_parse_show_Config, ) where -import qualified Data.Map as M -import Data.Time.Clock.POSIX -import Data.Char - import Annex.Common import qualified Annex.Branch import Types.Remote import Logs import Logs.UUIDBased +import qualified Data.Map as M +import Data.Char + {- Adds or updates a remote's config in the log. -} configSet :: UUID -> RemoteConfig -> Annex () -configSet u c = do - ts <- liftIO getPOSIXTime +configSet u cfg = do + c <- liftIO currentVectorClock Annex.Branch.change remoteLog $ - showLog showConfig . changeLog ts u c . parseLog parseConfig + showLog showConfig . changeLog c u cfg . parseLog parseConfig {- Map of remotes by uuid containing key/value config maps. -} readRemoteLog :: Annex (M.Map UUID RemoteConfig) diff --git a/Logs/RemoteState.hs b/Logs/RemoteState.hs index ff4979f9c..17d084f78 100644 --- a/Logs/RemoteState.hs +++ b/Logs/RemoteState.hs @@ -17,16 +17,15 @@ import qualified Annex.Branch import qualified Annex import qualified Data.Map as M -import Data.Time.Clock.POSIX type RemoteState = String setRemoteState :: UUID -> Key -> RemoteState -> Annex () setRemoteState u k s = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock config <- Annex.getGitConfig Annex.Branch.change (remoteStateLogFile config k) $ - showLogNew id . changeLog ts u s . parseLogNew Just + showLogNew id . changeLog c u s . parseLogNew Just getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState) getRemoteState u k = do diff --git a/Logs/Schedule.hs b/Logs/Schedule.hs index 844781d50..aea0df223 100644 --- a/Logs/Schedule.hs +++ b/Logs/Schedule.hs @@ -19,7 +19,6 @@ module Logs.Schedule ( import qualified Data.Map as M import qualified Data.Set as S -import Data.Time.Clock.POSIX import Data.Time.LocalTime import Annex.Common @@ -31,9 +30,9 @@ import Utility.Tmp scheduleSet :: UUID -> [ScheduledActivity] -> Annex () scheduleSet uuid@(UUID _) activities = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change scheduleLog $ - showLog id . changeLog ts uuid val . parseLog Just + showLog id . changeLog c uuid val . parseLog Just where val = fromScheduledActivities activities scheduleSet NoUUID _ = error "unknown UUID; cannot modify" diff --git a/Logs/SingleValue.hs b/Logs/SingleValue.hs index 201e205eb..24242c83f 100644 --- a/Logs/SingleValue.hs +++ b/Logs/SingleValue.hs @@ -17,16 +17,16 @@ import Annex.Common import qualified Annex.Branch import Logs.TimeStamp import Logs.Line +import Annex.VectorClock import qualified Data.Set as S -import Data.Time.Clock.POSIX class SingleValueSerializable v where serialize :: v -> String deserialize :: String -> Maybe v data LogEntry v = LogEntry - { changed :: POSIXTime + { changed :: VectorClock , value :: v } deriving (Eq, Show, Ord) @@ -42,9 +42,9 @@ parseLog = S.fromList . mapMaybe parse . splitLines where parse line = do let (ts, s) = splitword line - date <- parsePOSIXTime ts + c <- VectorClock <$> parsePOSIXTime ts v <- deserialize s - Just (LogEntry date v) + Just (LogEntry c v) splitword = separate (== ' ') newestValue :: Log v -> Maybe v @@ -60,6 +60,6 @@ getLog = newestValue <$$> readLog setLog :: (SingleValueSerializable v) => FilePath -> v -> Annex () setLog f v = do - now <- liftIO getPOSIXTime - let ent = LogEntry now v + c <- liftIO currentVectorClock + let ent = LogEntry c v Annex.Branch.change f $ \_old -> showLog (S.singleton ent) diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs index 04f9824b1..79acb87dd 100644 --- a/Logs/Transitions.hs +++ b/Logs/Transitions.hs @@ -14,13 +14,13 @@ module Logs.Transitions where -import Data.Time.Clock.POSIX -import qualified Data.Set as S - import Annex.Common +import Annex.VectorClock import Logs.TimeStamp import Logs.Line +import qualified Data.Set as S + transitionsLog :: FilePath transitionsLog = "transitions.log" @@ -30,7 +30,7 @@ data Transition deriving (Show, Ord, Eq, Read) data TransitionLine = TransitionLine - { transitionStarted :: POSIXTime + { transitionStarted :: VectorClock , transition :: Transition } deriving (Show, Ord, Eq) @@ -43,8 +43,8 @@ describeTransition ForgetDeadRemotes = "forget dead remotes" noTransitions :: Transitions noTransitions = S.empty -addTransition :: POSIXTime -> Transition -> Transitions -> Transitions -addTransition ts t = S.insert $ TransitionLine ts t +addTransition :: VectorClock -> Transition -> Transitions -> Transitions +addTransition c t = S.insert $ TransitionLine c t showTransitions :: Transitions -> String showTransitions = unlines . map showTransitionLine . S.elems @@ -67,7 +67,7 @@ showTransitionLine (TransitionLine ts t) = unwords [show t, show ts] parseTransitionLine :: String -> Maybe TransitionLine parseTransitionLine s = TransitionLine - <$> parsePOSIXTime ds + <$> (VectorClock <$> parsePOSIXTime ds) <*> readish ts where ws = words s diff --git a/Logs/Trust/Basic.hs b/Logs/Trust/Basic.hs index da542d472..850fcc95f 100644 --- a/Logs/Trust/Basic.hs +++ b/Logs/Trust/Basic.hs @@ -11,8 +11,6 @@ module Logs.Trust.Basic ( trustMapRaw, ) where -import Data.Time.Clock.POSIX - import Annex.Common import Types.TrustLevel import qualified Annex.Branch @@ -24,10 +22,10 @@ import Logs.Trust.Pure as X {- Changes the trust level for a uuid in the trustLog. -} trustSet :: UUID -> TrustLevel -> Annex () trustSet uuid@(UUID _) level = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change trustLog $ showLog showTrustLog . - changeLog ts uuid level . + changeLog c uuid level . parseLog (Just . parseTrustLog) Annex.changeState $ \s -> s { Annex.trustmap = Nothing } trustSet NoUUID _ = error "unknown UUID; cannot modify" diff --git a/Logs/UUID.hs b/Logs/UUID.hs index 4c84d10bd..1160dfcda 100644 --- a/Logs/UUID.hs +++ b/Logs/UUID.hs @@ -21,23 +21,23 @@ module Logs.UUID ( uuidMapLoad ) where -import qualified Data.Map as M -import Data.Time.Clock.POSIX - import Types.UUID import Annex.Common +import Annex.VectorClock import qualified Annex import qualified Annex.Branch import Logs import Logs.UUIDBased import qualified Annex.UUID +import qualified Data.Map as M + {- Records a description for a uuid in the log. -} describeUUID :: UUID -> String -> Annex () describeUUID uuid desc = do - ts <- liftIO getPOSIXTime + c <- liftIO currentVectorClock Annex.Branch.change uuidLog $ - showLog id . changeLog ts uuid desc . fixBadUUID . parseLog Just + showLog id . changeLog c uuid desc . fixBadUUID . parseLog Just {- Temporarily here to fix badly formatted uuid logs generated by - versions 3.20111105 and 3.20111025. @@ -52,7 +52,7 @@ fixBadUUID :: Log String -> Log String fixBadUUID = M.fromList . map fixup . M.toList where fixup (k, v) - | isbad = (fixeduuid, LogEntry (Date $ newertime v) fixedvalue) + | isbad = (fixeduuid, LogEntry (newertime v) fixedvalue) | otherwise = (k, v) where kuuid = fromUUID k @@ -63,8 +63,8 @@ fixBadUUID = M.fromList . map fixup . M.toList fixedvalue = unwords $ kuuid: Prelude.init ws -- For the fixed line to take precidence, it should be -- slightly newer, but only slightly. - newertime (LogEntry (Date d) _) = d + minimumPOSIXTimeSlice - newertime (LogEntry Unknown _) = minimumPOSIXTimeSlice + newertime (LogEntry (VectorClock c) _) = VectorClock (c + minimumPOSIXTimeSlice) + newertime (LogEntry Unknown _) = VectorClock minimumPOSIXTimeSlice minimumPOSIXTimeSlice = 0.000001 isuuid s = length s == 36 && length (splitc '-' s) == 5 diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs index 97ecd1011..fd1cd7c2d 100644 --- a/Logs/UUIDBased.hs +++ b/Logs/UUIDBased.hs @@ -17,7 +17,8 @@ module Logs.UUIDBased ( Log, LogEntry(..), - TimeStamp(..), + VectorClock, + currentVectorClock, parseLog, parseLogNew, parseLogWithUUID, @@ -29,10 +30,10 @@ module Logs.UUIDBased ( ) where import qualified Data.Map as M -import Data.Time.Clock.POSIX import Common import Types.UUID +import Annex.VectorClock import Logs.MapLog import Logs.TimeStamp import Logs.Line @@ -42,8 +43,8 @@ type Log v = MapLog UUID v showLog :: (v -> String) -> Log v -> String showLog shower = unlines . map showpair . M.toList where - showpair (k, LogEntry (Date p) v) = - unwords [fromUUID k, shower v, tskey ++ show p] + showpair (k, LogEntry (VectorClock c) v) = + unwords [fromUUID k, shower v, tskey ++ show c] showpair (k, LogEntry Unknown v) = unwords [fromUUID k, shower v] @@ -75,7 +76,7 @@ parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . splitLines | otherwise = drop 1 $ beginning ws pdate s = case parsePOSIXTime s of Nothing -> Unknown - Just d -> Date d + Just d -> VectorClock d showLogNew :: (v -> String) -> Log v -> String showLogNew = showMapLog fromUUID @@ -83,7 +84,7 @@ showLogNew = showMapLog fromUUID parseLogNew :: (String -> Maybe v) -> String -> Log v parseLogNew = parseMapLog (Just . toUUID) -changeLog :: POSIXTime -> UUID -> v -> Log v -> Log v +changeLog :: VectorClock -> UUID -> v -> Log v -> Log v changeLog = changeMapLog addLog :: UUID -> LogEntry v -> Log v -> Log v @@ -76,6 +76,7 @@ import qualified Annex.Init import qualified Annex.CatFile import qualified Annex.Path import qualified Annex.AdjustedBranch +import qualified Annex.VectorClock import qualified Annex.View import qualified Annex.View.ViewedFile import qualified Logs.View @@ -176,7 +177,7 @@ properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck" , testProperty "prop_cost_sane" Config.Cost.prop_cost_sane , testProperty "prop_matcher_sane" Utility.Matcher.prop_matcher_sane , testProperty "prop_HmacSha1WithCipher_sane" Crypto.prop_HmacSha1WithCipher_sane - , testProperty "prop_TimeStamp_sane" Logs.MapLog.prop_TimeStamp_sane + , testProperty "prop_VectorClock_sane" Annex.VectorClock.prop_VectorClock_sane , testProperty "prop_addMapLog_sane" Logs.MapLog.prop_addMapLog_sane , testProperty "prop_verifiable_sane" Utility.Verifiable.prop_verifiable_sane , testProperty "prop_segment_regressionTest" Utility.Misc.prop_segment_regressionTest diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 99f6c9076..c7d0f10da 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1467,6 +1467,19 @@ These environment variables are used by git-annex when set: Usually it's better to configure any desired options through your ~/.ssh/config file, or by setting `annex.ssh-options`. +* `GIT_ANNEX_VECTOR_CLOCK` + + Normally git-annex timestamps lines in the log files committed to the + git-annex branch. Setting this environment variable to a number + will make git-annex use that rather than the current number of seconds + since the UNIX epoch. Note that decimal seconds are supported. + + This is only provided for advanced users who either have a better way to + tell which commit is current than the local clock, or who need to avoid + embedding timestamps for policy reasons. Misuse of this environment + variable can confuse git-annex's book-keeping, sometimes in ways that + `git annex fsck` is unable to repair. + Some special remotes use additional environment variables for authentication etc. For example, `AWS_ACCESS_KEY_ID` and `GIT_ANNEX_P2P_AUTHTOKEN`. See special remote documentation. diff --git a/git-annex.cabal b/git-annex.cabal index 4b0c9cdd2..ad0ef9271 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -536,6 +536,7 @@ Executable git-annex Annex.UpdateInstead Annex.UUID Annex.Url + Annex.VectorClock Annex.VariantFile Annex.Version Annex.View |