diff options
author | Joey Hess <joey@kitenet.net> | 2014-02-23 00:08:29 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2014-02-23 00:08:29 -0400 |
commit | 704fa2fa789b453d354f2f375b0b42481c8990f6 (patch) | |
tree | afb28b2548c067be546b22fa6b04fa8b5d76c3fd | |
parent | e0ae2f6bba897367ae23bfb1a6584ebd4843452e (diff) |
annex.genmetadata can be set to make git-annex automatically set metadata (year and month) when adding files
-rw-r--r-- | Annex/MetaData.hs | 51 | ||||
-rw-r--r-- | Annex/View.hs | 3 | ||||
-rw-r--r-- | Command/Add.hs | 25 | ||||
-rw-r--r-- | Command/MetaData.hs | 3 | ||||
-rw-r--r-- | Command/View.hs | 1 | ||||
-rw-r--r-- | Logs/MetaData.hs | 4 | ||||
-rw-r--r-- | Types/GitConfig.hs | 2 | ||||
-rw-r--r-- | Types/MetaData.hs | 20 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | doc/design/metadata.mdwn | 18 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 6 | ||||
-rw-r--r-- | doc/metadata.mdwn | 41 | ||||
-rw-r--r-- | doc/tips/metadata_driven_views.mdwn | 2 |
13 files changed, 135 insertions, 43 deletions
diff --git a/Annex/MetaData.hs b/Annex/MetaData.hs new file mode 100644 index 000000000..ef235b51f --- /dev/null +++ b/Annex/MetaData.hs @@ -0,0 +1,51 @@ +{- git-annex metadata + - + - Copyright 2014 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.MetaData where + +import Common.Annex +import qualified Annex +import Types.MetaData +import Logs.MetaData + +import qualified Data.Set as S +import qualified Data.Map as M +import Data.Time.Calendar +import Data.Time.Clock +import Data.Time.Clock.POSIX + +tagMetaField :: MetaField +tagMetaField = MetaField "tag" + +yearMetaField :: MetaField +yearMetaField = MetaField "year" + +monthMetaField :: MetaField +monthMetaField = MetaField "month" + +{- Generates metadata for a file that has just been ingested into the + - annex. Passed the FileStatus of the content file. + - + - Does not overwrite any existing metadata values for the key. + -} +genMetaData :: Key -> FileStatus -> Annex () +genMetaData key status = whenM (annexGenMetaData <$> Annex.getGitConfig) $ do + metadata <- getCurrentMetaData key + let metadata' = genMetaData' status metadata + unless (metadata' == emptyMetaData) $ + addMetaData key metadata' + +genMetaData' :: FileStatus -> MetaData -> MetaData +genMetaData' status old = MetaData $ M.fromList $ filter isnew + [ (yearMetaField, S.singleton $ toMetaValue $ show y) + , (monthMetaField, S.singleton $ toMetaValue $ show m) + ] + where + isnew (f, _) = S.null (currentMetaDataValues f old) + (y, m, _d) = toGregorian $ utctDay $ + posixSecondsToUTCTime $ realToFrac $ + modificationTime status diff --git a/Annex/View.hs b/Annex/View.hs index 69d064753..9d1a763e2 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -300,8 +300,7 @@ applyView' mkviewedfile getfilemetadata view = do genviewedfiles = viewedFiles view mkviewedfile -- enables memoization go uh hasher f (Just (k, _)) = do metadata <- getCurrentMetaData k - let dirmetadata = getfilemetadata f - let metadata' = unionMetaData dirmetadata metadata + let metadata' = getfilemetadata f `unionMetaData` metadata forM_ (genviewedfiles f metadata') $ \fv -> do stagesymlink uh hasher fv =<< inRepo (gitAnnexLink fv k) go uh hasher f Nothing diff --git a/Command/Add.hs b/Command/Add.hs index d1dcb6025..0906ae531 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -19,6 +19,7 @@ import Annex.Content import Annex.Content.Direct import Annex.Perms import Annex.Link +import Annex.MetaData import qualified Annex import qualified Annex.Queue #ifdef WITH_CLIBS @@ -145,26 +146,32 @@ ingest Nothing = return (Nothing, Nothing) ingest (Just source) = do backend <- chooseBackend $ keyFilename source k <- genKey source backend - cache <- liftIO $ genInodeCache $ contentLocation source - case (cache, inodeCache source) of - (_, Nothing) -> go k cache - (Just newc, Just c) | compareStrong c newc -> go k cache + ms <- liftIO $ catchMaybeIO $ getFileStatus $ contentLocation source + let mcache = toInodeCache =<< ms + case (mcache, inodeCache source) of + (_, Nothing) -> go k mcache ms + (Just newc, Just c) | compareStrong c newc -> go k mcache ms _ -> failure "changed while it was being added" where - go k cache = ifM isDirect ( godirect k cache , goindirect k cache ) + go k mcache ms = ifM isDirect + ( godirect k mcache ms + , goindirect k mcache ms + ) - goindirect (Just (key, _)) mcache = do + goindirect (Just (key, _)) mcache ms = do catchAnnex (moveAnnex key $ contentLocation source) (undo (keyFilename source) key) + maybe noop (genMetaData key) ms liftIO $ nukeFile $ keyFilename source return $ (Just key, mcache) - goindirect Nothing _ = failure "failed to generate a key" + goindirect _ _ _ = failure "failed to generate a key" - godirect (Just (key, _)) (Just cache) = do + godirect (Just (key, _)) (Just cache) ms = do addInodeCache key cache + maybe noop (genMetaData key) ms finishIngestDirect key source return $ (Just key, Just cache) - godirect _ _ = failure "failed to generate a key" + godirect _ _ _ = failure "failed to generate a key" failure msg = do warning $ keyFilename source ++ " " ++ msg diff --git a/Command/MetaData.hs b/Command/MetaData.hs index 6112dd095..651cb4944 100644 --- a/Command/MetaData.hs +++ b/Command/MetaData.hs @@ -10,6 +10,7 @@ module Command.MetaData where import Common.Annex import qualified Annex import Command +import Annex.MetaData import Logs.MetaData import Types.MetaData @@ -55,7 +56,7 @@ perform :: POSIXTime -> [ModMeta] -> Key -> CommandPerform perform _ [] k = next $ cleanup k perform now ms k = do oldm <- getCurrentMetaData k - let m = foldl' unionMetaData newMetaData $ map (modMeta oldm) ms + let m = foldl' unionMetaData emptyMetaData $ map (modMeta oldm) ms addMetaData' k m now next $ cleanup k diff --git a/Command/View.hs b/Command/View.hs index f123e3812..e5182e852 100644 --- a/Command/View.hs +++ b/Command/View.hs @@ -14,6 +14,7 @@ import qualified Git.Command import qualified Git.Ref import qualified Git.Branch import Types.MetaData +import Annex.MetaData import Types.View import Annex.View import Logs.View diff --git a/Logs/MetaData.hs b/Logs/MetaData.hs index 77c1b56a5..63314bcef 100644 --- a/Logs/MetaData.hs +++ b/Logs/MetaData.hs @@ -55,7 +55,7 @@ getMetaData = readLog . metaDataLogFile getCurrentMetaData :: Key -> Annex MetaData getCurrentMetaData = currentMetaData . collect <$$> getMetaData where - collect = foldl' unionMetaData newMetaData . map value . S.toAscList + collect = foldl' unionMetaData emptyMetaData . map value . S.toAscList {- Adds in some metadata, which can override existing values, or unset - them, but otherwise leaves any existing metadata as-is. -} @@ -129,7 +129,7 @@ simplifyLog s = case sl of go c _ [] = c go c newer (l:ls) - | unique == newMetaData = go c newer ls + | unique == emptyMetaData = go c newer ls | otherwise = go (l { value = unique } : c) (unionMetaData unique newer) ls where diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index ab3dbd7b9..65984a108 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -49,6 +49,7 @@ data GitConfig = GitConfig , annexAutoUpgrade :: AutoUpgrade , annexExpireUnused :: Maybe (Maybe Duration) , annexSecureEraseCommand :: Maybe String + , annexGenMetaData :: Bool , coreSymlinks :: Bool , gcryptId :: Maybe String } @@ -81,6 +82,7 @@ extractGitConfig r = GitConfig , annexExpireUnused = maybe Nothing Just . parseDuration <$> getmaybe (annex "expireunused") , annexSecureEraseCommand = getmaybe (annex "secure-erase-command") + , annexGenMetaData = getbool (annex "genmetadata") False , coreSymlinks = getbool "core.symlinks" True , gcryptId = getmaybe "core.gcrypt-id" } diff --git a/Types/MetaData.hs b/Types/MetaData.hs index 617c122a6..b941cb59b 100644 --- a/Types/MetaData.hs +++ b/Types/MetaData.hs @@ -17,7 +17,6 @@ module Types.MetaData ( MetaSerializable, toMetaField, mkMetaField, - tagMetaField, fromMetaField, toMetaValue, mkMetaValue, @@ -25,7 +24,7 @@ module Types.MetaData ( unsetMetaData, fromMetaValue, fromMetaData, - newMetaData, + emptyMetaData, updateMetaData, unionMetaData, differenceMetaData, @@ -81,7 +80,7 @@ instance MetaSerializable MetaData where serialize (MetaData m) = unwords $ concatMap go $ M.toList m where go (f, vs) = serialize f : map serialize (S.toList vs) - deserialize = Just . getfield newMetaData . words + deserialize = Just . getfield emptyMetaData . words where getfield m [] = m getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w) @@ -152,8 +151,8 @@ fromMetaValue (MetaValue _ f) = f fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)] fromMetaData (MetaData m) = M.toList m -newMetaData :: MetaData -newMetaData = MetaData M.empty +emptyMetaData :: MetaData +emptyMetaData = MetaData M.empty {- Can be used to set a value, or to unset it, depending on whether - the MetaValue has CurrentlySet or not. -} @@ -202,10 +201,10 @@ data ModMeta - Note that the new MetaData does not include all the - values set in the input metadata. It only contains changed values. -} modMeta :: MetaData -> ModMeta -> MetaData -modMeta _ (AddMeta f v) = updateMetaData f v newMetaData -modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) newMetaData +modMeta _ (AddMeta f v) = updateMetaData f v emptyMetaData +modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaData modMeta m (SetMeta f v) = updateMetaData f v $ - foldr (updateMetaData f) newMetaData $ + foldr (updateMetaData f) emptyMetaData $ map unsetMetaValue $ S.toList $ currentMetaDataValues f m {- Parses field=value, field+=value, field-=value -} @@ -233,9 +232,6 @@ mkMetaField f = maybe (Left $ badField f) Right (toMetaField f) badField :: String -> String badField f = "Illegal metadata field name, \"" ++ f ++ "\"" -tagMetaField :: MetaField -tagMetaField = MetaField "tag" - {- Avoid putting too many fields in the map; extremely large maps make - the seriaization test slow due to the sheer amount of data. - It's unlikely that more than 100 fields of metadata will be used. -} @@ -254,7 +250,7 @@ prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool prop_metadata_sane m f v = and [ S.member v $ metaDataValues f m' , not (isSet v) || S.member v (currentMetaDataValues f m') - , differenceMetaData m' newMetaData == m' + , differenceMetaData m' emptyMetaData == m' ] where m' = updateMetaData f v m diff --git a/debian/changelog b/debian/changelog index 4d5589ed0..edb203402 100644 --- a/debian/changelog +++ b/debian/changelog @@ -9,6 +9,8 @@ git-annex (5.20140222) UNRELEASED; urgency=medium directory hierarchy in a view. For example `git annex view tag=* podcasts/=*` makes a view in the form tag/showname. + * annex.genmetadata can be set to make git-annex automatically set + metadata (year and month) when adding files. -- Joey Hess <joeyh@debian.org> Fri, 21 Feb 2014 13:03:04 -0400 diff --git a/doc/design/metadata.mdwn b/doc/design/metadata.mdwn index a648f5fe6..992ae1ca7 100644 --- a/doc/design/metadata.mdwn +++ b/doc/design/metadata.mdwn @@ -29,7 +29,7 @@ directories nest. relevant metadata from the files. TODO: It's not clear that removing a file should nuke all the metadata used to filter it into the - branch (especially if it's derived metadata like the year). + branch Currently, only metadata used for visible subdirs is added and removed this way. Also, this is not usable in direct mode because deleting the @@ -56,19 +56,7 @@ For example, by examining MP3 metadata. Also auto add metadata when adding files to view branches. See below. -## derived metadata - -This is probably not stored anywhere. It's computed on demand by a pure -function from the other metadata. -(Should be a general mechanism for this. (It probably generalizes to -sql queries if we want to go that far.)) - -### data metadata - -TODO From the ctime, some additional -metadata is derived, at least year=yyyy and probably also month, etc. - -### directory hierarchy metadata +## directory hierarchy metadata From the original filename used in the master branch, when constructing a view, generate fields. For example foo/bar/baz.mp3 @@ -87,8 +75,6 @@ those filenames to derive the same metadata, unless there is persistent storage. Luckily, the filenames used in the views currently include the subdirs. -**done**! - # other uses for metadata Uses are not limited to view branches. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 3c233f378..ab158a952 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1279,6 +1279,12 @@ Here are all the supported configuration settings. Note that setting numcopies to 0 is very unsafe. +* `annex.genmetadata` + + Set this to `true` to make git-annex automatically generate some metadata + when adding files to the repository. In particular, it stores + year and month metadata, from the file's modification date. + * `annex.queuesize` git-annex builds a queue of git commands, in order to combine similar diff --git a/doc/metadata.mdwn b/doc/metadata.mdwn new file mode 100644 index 000000000..414a91053 --- /dev/null +++ b/doc/metadata.mdwn @@ -0,0 +1,41 @@ +git-annex allows you to store arbitrary metadata about files stored in the +git-annex repository. The metadata is stored in the `git-annex` branch, and +so is automatically kept in sync with the rest of git-annex's state, such +as [[location_tracking]] information. + +Some of the things you can do with metadata include: + +* Using `git annex metadata file` to show all + the metadata associated with a file. +* [[tips/metadata_driven_views]] +* Limiting the files git-annex commands act on to those with + or without particular metadata. + For example `git annex find --metadata tag=foo --or --metadata tag=bar` +* Using it in [[preferred_content]] expressions. + For example "tag=important or not author=me" + +Each file (actually the underlying key) can have any number of metadata +fields, which each can have any number of values. For example, to tag +files, the `tag` field is typically used, with values set to each tag that +applies to the file. + +The field names are freeform (but cannot include spaces). The metadata +values can contain absolutely anything you like -- but you're recommended +to keep it simple and reasonably short. + +Here are some recommended metadata fields to use: + +* `tag` - With each tag being a different value. +* `year`, `month` - When this particular version of the file came into + being. + +To make git-annex automatically set the year and month when adding files, +run `git config annex.genmetadata true` + +git-annex's metadata can be updated in a distributed fashion. For example, +two users, each with their own clone of a repository, can set and unset +metadata at the same time, even for the same field of the same file. +When they push their changes, `git annex merge` will combine their +metadata changes in a consistent and (probably) intuitive way. + +See [[the metadata design page|design/metadata]] for more details. diff --git a/doc/tips/metadata_driven_views.mdwn b/doc/tips/metadata_driven_views.mdwn index e24bf29ae..0af05bd3f 100644 --- a/doc/tips/metadata_driven_views.mdwn +++ b/doc/tips/metadata_driven_views.mdwn @@ -1,5 +1,5 @@ git-annex now has support for storing -[[arbitrary metadata|design/metadata]] about annexed files. For example, this can be +[[arbitrary metadata|metadata]] about annexed files. For example, this can be used to tag files, to record the author of a file, etc. The metadata is synced around between repositories with the other information git-annex keeps track of. |