From 57b2c5d0eefa883bd77a846af41c30e108c6aa9b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 26 Jul 2016 14:53:00 -0400 Subject: saner format for metadata --json metadata --json output format has changed, adding a inner json object named "fields" which contains only the fields and their values. This should be easier to parse than the old format, which mixed up metadata fields with other keys in the json object. Any consumers of the old format will need to be updated. This adds a dependency on unordered-containers for parsing MetaData from JSON, but it's a free dependency; aeson pulls in that library. --- CHANGELOG | 7 ++++++- Command/MetaData.hs | 40 ++++++++++++++++++++++++++++++++++++---- Types/MetaData.hs | 25 +++++++++++++++++++++++++ debian/control | 1 + doc/git-annex-metadata.mdwn | 10 ++++++++++ git-annex.cabal | 3 ++- 6 files changed, 80 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 79e4dc1bd..45da171c2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,10 @@ -git-annex (6.20160620) UNRELEASED; urgency=medium +git-annex (6.20160726) UNRELEASED; urgency=medium + * metadata --json output format has changed, adding a inner json object + named "fields" which contains only the fields and their values. + This should be easier to parse than the old format, which mixed up + metadata fields with other keys in the json object. + Any consumers of the old format will need to be updated. * Added --branch option to copy, drop, fsck, get, metadata, mirror, move, and whereis commands. This option makes git-annex operate on files that are included in a specified branch (or other treeish). diff --git a/Command/MetaData.hs b/Command/MetaData.hs index 3123a63d0..66469f2fc 100644 --- a/Command/MetaData.hs +++ b/Command/MetaData.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2014 Joey Hess + - Copyright 2014-2016 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -10,9 +10,13 @@ module Command.MetaData where import Command import Annex.MetaData import Logs.MetaData +import Messages.JSON (ParsedJSON(..)) import qualified Data.Set as S +import qualified Data.Text as T +import qualified Data.ByteString.Lazy.UTF8 as BU import Data.Time.Clock.POSIX +import Data.Aeson cmd :: Command cmd = withGlobalOptions ([jsonOption] ++ annexedMatchingOptions) $ @@ -95,10 +99,38 @@ perform now o k = case getSet o of cleanup :: Key -> CommandCleanup cleanup k = do - l <- map unwrapmeta . fromMetaData <$> getCurrentMetaData k - maybeShowJSON (JSONObject l) - showLongNote $ unlines $ concatMap showmeta l + m <- getCurrentMetaData k + let Object o = toJSON (MetaDataFields m) + maybeShowJSON $ AesonObject o + showLongNote $ unlines $ concatMap showmeta $ + map unwrapmeta (fromMetaData m) return True where unwrapmeta (f, v) = (fromMetaField f, map fromMetaValue (S.toList v)) showmeta (f, vs) = map ((f ++ "=") ++) vs + +-- Metadata serialized to JSON in the field named "fields" of +-- a larger object. +newtype MetaDataFields = MetaDataFields MetaData + deriving (Show) + +instance ToJSON MetaDataFields where + toJSON (MetaDataFields m) = object [ (fieldsField, toJSON m) ] + +instance FromJSON MetaDataFields where + parseJSON (Object v) = do + f <- v .: fieldsField + case f of + Nothing -> return (MetaDataFields emptyMetaData) + Just v' -> MetaDataFields <$> parseJSON v' + parseJSON _ = fail "expected an object" + +fieldsField :: T.Text +fieldsField = T.pack "fields" + +parseJSONInput :: String -> Maybe (Either FilePath Key, MetaData) +parseJSONInput i = do + v <- decode (BU.fromString i) + case parsedAdded v of + Nothing -> return (parsedKeyfile v, emptyMetaData) + Just (MetaDataFields m) -> return (parsedKeyfile v, m) diff --git a/Types/MetaData.hs b/Types/MetaData.hs index 449548d53..a62dd7ed0 100644 --- a/Types/MetaData.hs +++ b/Types/MetaData.hs @@ -44,14 +44,32 @@ import Common import Utility.Base64 import Utility.QuickCheck +import qualified Data.Text as T import qualified Data.Set as S import qualified Data.Map as M +import qualified Data.HashMap.Strict as HM import Data.Char import qualified Data.CaseInsensitive as CI +import Data.Aeson newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue)) deriving (Show, Eq, Ord) +instance ToJSON MetaData where + toJSON (MetaData m) = object $ map go (M.toList m) + where + go (MetaField f, s) = (T.pack (CI.original f), toJSON s) + +instance FromJSON MetaData where + parseJSON (Object o) = do + l <- HM.toList <$> parseJSON (Object o) + MetaData . M.fromList <$> mapM go l + where + go (t, l) = case mkMetaField (T.unpack t) of + Left e -> fail e + Right f -> (,) <$> pure f <*> parseJSON l + parseJSON _ = fail "expected an object" + {- A metadata value can be currently be set (True), or may have been - set before and we're remembering it no longer is (False). -} newtype CurrentlySet = CurrentlySet Bool @@ -64,6 +82,13 @@ newtype MetaField = MetaField (CI.CI String) data MetaValue = MetaValue CurrentlySet String deriving (Read, Show) +instance ToJSON MetaValue where + toJSON (MetaValue _ v) = toJSON v + +instance FromJSON MetaValue where + parseJSON (String v) = return $ MetaValue (CurrentlySet True) (T.unpack v) + parseJSON _ = fail "expected a string" + {- Metadata values compare and order the same whether currently set or not. -} instance Eq MetaValue where MetaValue _ a == MetaValue _ b = a == b diff --git a/debian/control b/debian/control index f9dfec92e..30c4274ce 100644 --- a/debian/control +++ b/debian/control @@ -25,6 +25,7 @@ Build-Depends: libghc-uuid-dev, libghc-json-dev, libghc-aeson-dev, + libghc-unordered-containers-dev, libghc-ifelse-dev, libghc-bloomfilter-dev, libghc-edit-distance-dev, diff --git a/doc/git-annex-metadata.mdwn b/doc/git-annex-metadata.mdwn index fe344ff5e..b4e790080 100644 --- a/doc/git-annex-metadata.mdwn +++ b/doc/git-annex-metadata.mdwn @@ -71,6 +71,16 @@ When run without any -s or -t parameters, displays the current metadata. Enable JSON output. This is intended to be parsed by programs that use git-annex. Each line of output is a JSON object. + The format of the JSON objects changed in git-annex version 6.20160726. + + Example of the new format: + + {"command":"metadata","file":"foo","key":"...","fields":{"author":["bar"],...},"note":"...","success":true} + + Example of the old format, which lacks the inner fields object: + + {"command":"metadata","file":"foo","key":"...","author":["bar"],...,"note":"...","success":true} + * `--all` Specify instead of a file to get/set metadata on all known keys. diff --git a/git-annex.cabal b/git-annex.cabal index d0ee98849..f9033cc38 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -323,7 +323,7 @@ Executable git-annex Build-Depends: base (>= 4.5 && < 5.0), optparse-applicative (>= 0.11.0), - containers (>= 0.5.0.0), + containers (>= 0.5.0.0), exceptions (>= 0.6), QuickCheck (>= 2.1), stm (>= 2.3), @@ -338,6 +338,7 @@ Executable git-annex time, old-locale, esqueleto, persistent-sqlite, persistent (<2.5), persistent-template, aeson, + unordered-containers, feed, regex-tdfa CC-Options: -Wall -- cgit v1.2.3