diff options
Diffstat (limited to 'Logs/MetaData.hs')
-rw-r--r-- | Logs/MetaData.hs | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/Logs/MetaData.hs b/Logs/MetaData.hs new file mode 100644 index 000000000..77c1b56a5 --- /dev/null +++ b/Logs/MetaData.hs @@ -0,0 +1,137 @@ +{- git-annex general metadata storage log + - + - A line of the log will look like "timestamp field [+-]value [...]" + - + - Note that unset values are preserved. Consider this case: + - + - We have: + - + - 100 foo +x + - 200 foo -x + - + - An unmerged remote has: + - + - 150 foo +x + - + - After union merge, because the foo -x was preserved, we know that + - after the other remote redundantly set foo +x, it was unset, + - and so foo currently has no value. + - + - + - Copyright 2014 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# OPTIONS_GHC -fno-warn-orphans #-} +{-# LANGUAGE CPP #-} + +module Logs.MetaData ( + getCurrentMetaData, + getMetaData, + addMetaData, + addMetaData', + currentMetaData, +) where + +import Common.Annex +import Types.MetaData +import qualified Annex.Branch +import Logs +import Logs.SingleValue + +import qualified Data.Set as S +import Data.Time.Clock.POSIX + +instance SingleValueSerializable MetaData where + serialize = Types.MetaData.serialize + deserialize = Types.MetaData.deserialize + +getMetaData :: Key -> Annex (Log MetaData) +getMetaData = readLog . metaDataLogFile + +{- Go through the log from oldest to newest, and combine it all + - into a single MetaData representing the current state. -} +getCurrentMetaData :: Key -> Annex MetaData +getCurrentMetaData = currentMetaData . collect <$$> getMetaData + where + collect = foldl' unionMetaData newMetaData . map value . S.toAscList + +{- Adds in some metadata, which can override existing values, or unset + - them, but otherwise leaves any existing metadata as-is. -} +addMetaData :: Key -> MetaData -> Annex () +addMetaData k metadata = addMetaData' k metadata =<< liftIO getPOSIXTime + +{- Reusing the same timestamp when making changes to the metadata + - of multiple keys is a nice optimisation. The same metadata lines + - will tend to be generated across the different log files, and so + - git will be able to pack the data more efficiently. -} +addMetaData' :: Key -> MetaData -> POSIXTime -> Annex () +addMetaData' k metadata now = Annex.Branch.change (metaDataLogFile k) $ + showLog . simplifyLog + . S.insert (LogEntry now metadata) + . parseLog + +{- Simplify a log, removing historical values that are no longer + - needed. + - + - This is not as simple as just making a single log line with the newest + - state of all metadata. Consider this case: + - + - We have: + - + - 100 foo +x bar +y + - 200 foo -x + - + - An unmerged remote has: + - + - 150 bar -y baz +w + - + - If what we have were simplified to "200 foo -x bar +y" then when the line + - from the remote became available, it would be older than the simplified + - line, and its change to bar would not take effect. That is wrong. + - + - Instead, simplify it to: + - + - 100 bar +y + - 200 foo -x + - + - (Note that this ends up with the same number of lines as the + - unsimplified version, so there's really no point in updating + - the log to this version. Doing so would only add data to git, + - with little benefit.) + - + - Now merging with the remote yields: + - + - 100 bar +y + - 150 bar -y baz +w + - 200 foo -x + - + - Simplifying again: + - + - 150 bar +z baz +w + - 200 foo -x + -} +simplifyLog :: Log MetaData -> Log MetaData +simplifyLog s = case sl of + (newest:rest) -> + let sl' = go [newest] (value newest) rest + in if length sl' < length sl + then S.fromList sl' + else s + _ -> s + where +#if MIN_VERSION_containers(0,5,0) + sl = S.toDescList s +#else + sl = reverse (S.toAscList s) +#endif + + go c _ [] = c + go c newer (l:ls) + | unique == newMetaData = go c newer ls + | otherwise = go (l { value = unique } : c) + (unionMetaData unique newer) ls + where + older = value l + unique = older `differenceMetaData` newer |