summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-02-12 21:12:22 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-02-12 21:30:33 -0400
commitcb0dad5172b743679d90c7fd6e490d4927ea5a76 (patch)
tree0f487cc027688a9e47749c9030099ed877e467a2
parent41e5f8dfe79d6db8b0bd1492d8f28caf6b24ef5f (diff)
add metadata command to get/set metadata
Adds metadata log, and command. Note that unsetting field values seems to currently be broken. And in general this has had all of 2 minutes worth of testing. This commit was sponsored by Julien Lefrique.
-rw-r--r--Annex/Branch/Transitions.hs2
-rw-r--r--CmdLine/GitAnnex.hs2
-rw-r--r--Command/MetaData.hs73
-rw-r--r--Logs.hs23
-rw-r--r--Logs/MetaData.hs135
-rw-r--r--Types/MetaData.hs57
-rw-r--r--doc/design/metadata.mdwn14
-rw-r--r--doc/git-annex.mdwn17
-rw-r--r--doc/internals.mdwn21
9 files changed, 312 insertions, 32 deletions
diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs
index 95d47257a..42c61d96a 100644
--- a/Annex/Branch/Transitions.hs
+++ b/Annex/Branch/Transitions.hs
@@ -41,7 +41,7 @@ dropDead f content trustmap = case getLogVariety f of
in if null newlog
then RemoveFile
else ChangeFile $ Presence.showLog newlog
- Just SingleValueLog -> PreserveFile
+ Just OtherLog -> PreserveFile
Nothing -> PreserveFile
dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs
index b25082963..a67c6be29 100644
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@@ -26,6 +26,7 @@ import qualified Command.DropKey
import qualified Command.TransferKey
import qualified Command.TransferKeys
import qualified Command.ReKey
+import qualified Command.MetaData
import qualified Command.Reinject
import qualified Command.Fix
import qualified Command.Init
@@ -134,6 +135,7 @@ cmds = concat
, Command.TransferKey.def
, Command.TransferKeys.def
, Command.ReKey.def
+ , Command.MetaData.def
, Command.Fix.def
, Command.Fsck.def
, Command.Repair.def
diff --git a/Command/MetaData.hs b/Command/MetaData.hs
new file mode 100644
index 000000000..f2c4abcea
--- /dev/null
+++ b/Command/MetaData.hs
@@ -0,0 +1,73 @@
+{- git-annex command
+ -
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Command.MetaData where
+
+import Common.Annex
+import Command
+import Logs.MetaData
+import Types.MetaData
+
+import qualified Data.Set as S
+
+def :: [Command]
+def = [command "metadata" (paramPair paramFile (paramRepeating "FIELD[+-]=VALUE")) seek
+ SectionUtility "sets metadata of a file"]
+
+seek :: CommandSeek
+seek = withWords start
+
+start :: [String] -> CommandStart
+start (file:settings) = ifAnnexed file
+ go
+ (error $ "not an annexed file, so cannot add metadata: " ++ file)
+ where
+ go (k, _b) = do
+ showStart "metadata" file
+ next $ perform k (map parse settings)
+start _ = error "specify a file and the metadata to set"
+
+perform :: Key -> [Action] -> CommandPerform
+perform k actions = do
+ m <- getCurrentMetaData k
+ if null actions
+ then next $ cleanup m
+ else do
+ let m' = foldr apply m actions
+ addMetaData k m'
+ next $ cleanup m'
+
+cleanup :: MetaData -> CommandCleanup
+cleanup m = do
+ showLongNote $ unlines $ concatMap showmeta $ fromMetaData $ currentMetaData m
+ return True
+ where
+ showmeta (f, vs) = map (\v -> fromMetaField f ++ "=" ++ fromMetaValue v) $ S.toList vs
+
+data Action
+ = AddMeta MetaField MetaValue
+ | DelMeta MetaField MetaValue
+ | SetMeta MetaField MetaValue
+
+parse :: String -> Action
+parse p = case lastMaybe f of
+ Just '+' -> AddMeta (mkf f') v
+ Just '-' -> DelMeta (mkf f') v
+ _ -> SetMeta (mkf f) v
+ where
+ (f, sv) = separate (== '=') p
+ f' = beginning f
+ v = toMetaValue sv
+ mkf fld = fromMaybe (badfield fld) (toMetaField fld)
+ badfield fld = error $ "Illegal metadata field name, \"" ++ fld ++ "\""
+
+apply :: Action -> MetaData -> MetaData
+apply (AddMeta f v) m = updateMetaData f v m
+apply (DelMeta f oldv) m = updateMetaData f (unsetMetaValue oldv) m
+apply (SetMeta f v) m = updateMetaData f v $
+ foldr (updateMetaData f) m $
+ map unsetMetaValue $ S.toList $ currentMetaDataValues f m
diff --git a/Logs.hs b/Logs.hs
index 1e7a8e8c4..21908a9cf 100644
--- a/Logs.hs
+++ b/Logs.hs
@@ -1,6 +1,6 @@
{- git-annex log file names
-
- - Copyright 2013 Joey Hess <joey@kitenet.net>
+ - Copyright 2013-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -15,7 +15,7 @@ data LogVariety
= UUIDBasedLog
| NewUUIDBasedLog
| PresenceLog Key
- | SingleValueLog
+ | OtherLog
deriving (Show)
{- Converts a path from the git-annex branch into one of the varieties
@@ -24,7 +24,7 @@ getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
| isRemoteStateLog f = Just NewUUIDBasedLog
- | f == numcopiesLog = Just SingleValueLog
+ | isMetaDataLog f || f == numcopiesLog = Just OtherLog
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
{- All the uuid-based logs stored in the top of the git-annex branch. -}
@@ -119,6 +119,16 @@ remoteStateLogExt = ".log.rmt"
isRemoteStateLog :: FilePath -> Bool
isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
+{- The filename of the metadata log for a given key. -}
+metaDataLogFile :: Key -> FilePath
+metaDataLogFile key = hashDirLower key </> keyFile key ++ metaDataLogExt
+
+metaDataLogExt :: String
+metaDataLogExt = ".log.met"
+
+isMetaDataLog :: FilePath -> Bool
+isMetaDataLog path = metaDataLogExt `isSuffixOf` path
+
prop_logs_sane :: Key -> Bool
prop_logs_sane dummykey = and
[ isNothing (getLogVariety "unknown")
@@ -126,7 +136,8 @@ prop_logs_sane dummykey = and
, expect isPresenceLog (getLogVariety $ locationLogFile dummykey)
, expect isPresenceLog (getLogVariety $ urlLogFile dummykey)
, expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
- , expect isSingleValueLog (getLogVariety $ numcopiesLog)
+ , expect isOtherLog (getLogVariety $ metaDataLogFile dummykey)
+ , expect isOtherLog (getLogVariety $ numcopiesLog)
]
where
expect = maybe False
@@ -136,5 +147,5 @@ prop_logs_sane dummykey = and
isNewUUIDBasedLog _ = False
isPresenceLog (PresenceLog k) = k == dummykey
isPresenceLog _ = False
- isSingleValueLog SingleValueLog = True
- isSingleValueLog _ = False
+ isOtherLog OtherLog = True
+ isOtherLog _ = False
diff --git a/Logs/MetaData.hs b/Logs/MetaData.hs
new file mode 100644
index 000000000..6f7f4154a
--- /dev/null
+++ b/Logs/MetaData.hs
@@ -0,0 +1,135 @@
+{- git-annex general metadata storage log
+ -
+ - A line of the log will look like "timestamp field [+-]value [...]"
+ -
+ - Note that unset values are preserved. Consider this case:
+ -
+ - We have:
+ -
+ - 100 foo +x
+ - 200 foo -x
+ -
+ - An unmerged remote has:
+ -
+ - 150 foo +x
+ -
+ - After union merge, because the foo -x was preserved, we know that
+ - after the other remote redundantly set foo +x, it was unset,
+ - and so foo currently has no value.
+ -
+ -
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+module Logs.MetaData (
+ getCurrentMetaData,
+ getMetaData,
+ setMetaData,
+ unsetMetaData,
+ addMetaData,
+ currentMetaData,
+) where
+
+import Common.Annex
+import Types.MetaData
+import qualified Annex.Branch
+import Logs
+import Logs.SingleValue
+
+import qualified Data.Set as S
+import Data.Time.Clock.POSIX
+
+instance SingleValueSerializable MetaData where
+ serialize = Types.MetaData.serialize
+ deserialize = Types.MetaData.deserialize
+
+getMetaData :: Key -> Annex (Log MetaData)
+getMetaData = readLog . metaDataLogFile
+
+{- Go through the log from oldest to newest, and combine it all
+ - into a single MetaData representing the current state. -}
+getCurrentMetaData :: Key -> Annex MetaData
+getCurrentMetaData = currentMetaData . collect <$$> getMetaData
+ where
+ collect = foldl' unionMetaData newMetaData . map value . S.toAscList
+
+setMetaData :: Key -> MetaField -> String -> Annex ()
+setMetaData = setMetaData' True
+
+unsetMetaData :: Key -> MetaField -> String -> Annex ()
+unsetMetaData = setMetaData' False
+
+setMetaData' :: Bool -> Key -> MetaField -> String -> Annex ()
+setMetaData' isset k field s = addMetaData k $
+ updateMetaData field (mkMetaValue (CurrentlySet isset) s) newMetaData
+
+{- Adds in some metadata, which can override existing values, or unset
+ - them, but otherwise leaves any existing metadata as-is. -}
+addMetaData :: Key -> MetaData -> Annex ()
+addMetaData k metadata = do
+ now <- liftIO getPOSIXTime
+ Annex.Branch.change (metaDataLogFile k) $
+ showLog . simplifyLog
+ . S.insert (LogEntry now metadata)
+ . parseLog
+
+{- Simplify a log, removing historical values that are no longer
+ - needed.
+ -
+ - This is not as simple as just making a single log line with the newest
+ - state of all metadata. Consider this case:
+ -
+ - We have:
+ -
+ - 100 foo +x bar +y
+ - 200 foo -x
+ -
+ - An unmerged remote has:
+ -
+ - 150 bar +z baz +w
+ -
+ - If what we have were simplified to "200 foo -x bar +y" then when the line
+ - from the remote became available, it would be older than the simplified
+ - line, and its change to bar would not take effect. That is wrong.
+ -
+ - Instead, simplify it to: (this simpliciation is optional)
+ -
+ - 100 bar +y (100 foo +x bar +y)
+ - 200 foo -x
+ -
+ - Now merging with the remote yields:
+ -
+ - 100 bar +y (100 foo +x bar +y)
+ - 150 bar +z baz +w
+ - 200 foo -x
+ -
+ - Simplifying again:
+ -
+ - 150 bar +z baz +w
+ - 200 foo -x
+ -
+ - In practice, there is little benefit to making simplications to lines
+ - that only remove some values, while leaving others on the line.
+ - Since lines are kept in git, that likely increases the size of the
+ - git repo (depending on compression), rather than saving any space.
+ -
+ - So, the only simplication that is actually done is to throw out an
+ - old line when all the values in it have been overridden by lines that
+ - came before
+ -}
+simplifyLog :: Log MetaData -> Log MetaData
+simplifyLog s = case S.toDescList s of
+ (newest:rest) -> S.fromList $ go [newest] (value newest) rest
+ _ -> s
+ where
+ go c _ [] = c
+ go c newer (l:ls)
+ | older `hasUniqueMetaData` newer =
+ go (l:c) (unionMetaData older newer) ls
+ | otherwise = go c newer ls
+ where
+ older = value l
diff --git a/Types/MetaData.hs b/Types/MetaData.hs
index ee6ba66a0..a8be9231d 100644
--- a/Types/MetaData.hs
+++ b/Types/MetaData.hs
@@ -16,11 +16,16 @@ module Types.MetaData (
toMetaField,
fromMetaField,
toMetaValue,
- toMetaValue',
+ mkMetaValue,
+ unsetMetaValue,
fromMetaValue,
+ fromMetaData,
newMetaData,
updateMetaData,
- getCurrentMetaData,
+ unionMetaData,
+ hasUniqueMetaData,
+ currentMetaData,
+ currentMetaDataValues,
getAllMetaData,
serialize,
deserialize,
@@ -37,7 +42,7 @@ import qualified Data.Map as M
import Data.Char
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
- deriving (Show, Eq)
+ deriving (Show, Eq, Ord)
{- A metadata value can be currently be set (True), or may have been
- set before and we're remembering it no longer is (False). -}
@@ -118,8 +123,11 @@ legalField f
toMetaValue :: String -> MetaValue
toMetaValue = MetaValue (CurrentlySet True)
-toMetaValue' :: CurrentlySet -> String -> MetaValue
-toMetaValue' = MetaValue
+mkMetaValue :: CurrentlySet -> String -> MetaValue
+mkMetaValue = MetaValue
+
+unsetMetaValue :: MetaValue -> MetaValue
+unsetMetaValue (MetaValue _ s) = MetaValue (CurrentlySet False) s
fromMetaField :: MetaField -> String
fromMetaField (MetaField f) = f
@@ -127,6 +135,9 @@ fromMetaField (MetaField f) = f
fromMetaValue :: MetaValue -> String
fromMetaValue (MetaValue _ f) = f
+fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
+fromMetaData (MetaData m) = M.toList m
+
newMetaData :: MetaData
newMetaData = MetaData M.empty
@@ -136,13 +147,38 @@ updateMetaData :: MetaField -> MetaValue -> MetaData -> MetaData
updateMetaData f v (MetaData m) = MetaData $
M.insertWith' S.union f (S.singleton v) m
-{- Gets only currently set values -}
-getCurrentMetaData :: MetaField -> MetaData -> S.Set MetaValue
-getCurrentMetaData f m = S.filter isSet (getAllMetaData f m)
+{- New metadata overrides old._-}
+unionMetaData :: MetaData -> MetaData -> MetaData
+unionMetaData (MetaData old) (MetaData new) = MetaData $
+ M.unionWith S.union new old
+
+{- Checks if m contains any fields with values that are not
+ - the same in comparewith. Note that unset and set values are
+ - considered to be the same, so if m sets a value and comparewith
+ - unsets it, m is not unique. However, if m changes the value,
+ - or adds a new value, it is unique. -}
+hasUniqueMetaData :: MetaData -> MetaData -> Bool
+hasUniqueMetaData (MetaData comparewith) (MetaData m) =
+ any uniquefield (M.toList m)
+ where
+ uniquefield :: (MetaField, S.Set MetaValue) -> Bool
+ uniquefield (f, v) = maybe True (uniquevalue v) (M.lookup f comparewith)
+ uniquevalue v1 v2 = not $ S.null $ S.difference v1 v2
isSet :: MetaValue -> Bool
isSet (MetaValue (CurrentlySet isset) _) = isset
+{- Gets only currently set values -}
+currentMetaDataValues :: MetaField -> MetaData -> S.Set MetaValue
+currentMetaDataValues f m = S.filter isSet (getAllMetaData f m)
+
+currentMetaData :: MetaData -> MetaData
+currentMetaData (MetaData m) = removeEmptyFields $ MetaData $
+ M.map (S.filter isSet) m
+
+removeEmptyFields :: MetaData -> MetaData
+removeEmptyFields (MetaData m) = MetaData $ M.filter (not . S.null) m
+
{- Gets currently set values, but also values that have been unset. -}
getAllMetaData :: MetaField -> MetaData -> S.Set MetaValue
getAllMetaData f (MetaData m) = fromMaybe S.empty (M.lookup f m)
@@ -164,7 +200,7 @@ instance Arbitrary MetaField where
prop_updateMetaData_sane :: MetaData -> MetaField -> MetaValue -> Bool
prop_updateMetaData_sane m f v = and
[ S.member v $ getAllMetaData f m'
- , not (isSet v) || S.member v (getCurrentMetaData f m')
+ , not (isSet v) || S.member v (currentMetaDataValues f m')
]
where
m' = updateMetaData f v m
@@ -176,5 +212,4 @@ prop_metadata_serialize f v m = and
, deserialize (serialize m') == Just m'
]
where
- m' = removeemptyfields m
- removeemptyfields (MetaData x) = MetaData $ M.filter (not . S.null) x
+ m' = removeEmptyFields m
diff --git a/doc/design/metadata.mdwn b/doc/design/metadata.mdwn
index 40c085cdc..3e2d4bf08 100644
--- a/doc/design/metadata.mdwn
+++ b/doc/design/metadata.mdwn
@@ -145,20 +145,6 @@ a tag was removed:
1287290991.152124s tag +baz
1291237510.141453s tag -bar
-The end result is that tags foo and baz are set. This can be simplified:
-
- 1291237510.141453s tag +foo +baz -bar
-
-Note the reuse of the most recent timestamp in the simplified version,
-rather than putting in the timestamp when the simplification was done.
-This ensures that is some other repo is making changes, they won't get
-trampled over. For example:
-
- 1291237510.141453s tag +foo +baz -bar
- 1291239999.000000s tag +bar -foo
-
-Now tags bar and baz are set.
-
# efficient metadata lookup
Looking up metadata for filtering so far requires traversing all keys in
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 4e672f608..17d78c555 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -695,6 +695,23 @@ subdirectories).
# UTILITY COMMANDS
+* `metadata file [field=value field+=value field-=value ...]`
+
+ Each file can have any number of metadata fields attached to it,
+ which each in turn have any number of values. This sets metadata
+ for a file, or if run without any values, shows its current metadata.
+
+ To set a field's value, removing any old value(s), use field=value.
+
+ To add an additional value, use field+=value.
+
+ To remove a value, use field-=value.
+
+ For example, to set some tags on a file:
+
+ git annex metadata annexscreencast.ogv tag+=video tag+=screencast
+
+
* `migrate [path ...]`
Changes the specified annexed files to use the default key-value backend
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index 1cf0cf505..970e88ba0 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -146,6 +146,27 @@ Example:
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 blah blah
1287290767.478634s 26339d22-446b-11e0-9101-002170d25c55 foo=bar
+## `aaa/bbb/*.log.met`
+
+These log files are used to store arbitrary [[design/metadata]] about keys.
+Each key can have any number of metadata fields. Each field has a set of
+values.
+
+Lines are timestamped, and record when values are added (`field +value`),
+but also when values are removed (`field -value`). Removed values
+are retained in the log so that when merging an old line that sets a value
+that was later unset, the value is not accidentially added back.
+
+For example:
+
+ 1287290776.765152s tag +foo +bar author +joey
+ 1291237510.141453s tag -bar +baz
+
+The value can be completely arbitrary data, although it's typically
+reasonably short. If the value contains any whitespace
+(including \r or \r), it will be base64 encoded. Base64 encoded values
+are indicated by prefixing them with "!"
+
## `schedule.log`
Used to record scheduled events, such as periodic fscks.