summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-02-23 00:08:29 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-02-23 00:08:29 -0400
commit704fa2fa789b453d354f2f375b0b42481c8990f6 (patch)
treeafb28b2548c067be546b22fa6b04fa8b5d76c3fd
parente0ae2f6bba897367ae23bfb1a6584ebd4843452e (diff)
annex.genmetadata can be set to make git-annex automatically set metadata (year and month) when adding files
-rw-r--r--Annex/MetaData.hs51
-rw-r--r--Annex/View.hs3
-rw-r--r--Command/Add.hs25
-rw-r--r--Command/MetaData.hs3
-rw-r--r--Command/View.hs1
-rw-r--r--Logs/MetaData.hs4
-rw-r--r--Types/GitConfig.hs2
-rw-r--r--Types/MetaData.hs20
-rw-r--r--debian/changelog2
-rw-r--r--doc/design/metadata.mdwn18
-rw-r--r--doc/git-annex.mdwn6
-rw-r--r--doc/metadata.mdwn41
-rw-r--r--doc/tips/metadata_driven_views.mdwn2
13 files changed, 135 insertions, 43 deletions
diff --git a/Annex/MetaData.hs b/Annex/MetaData.hs
new file mode 100644
index 000000000..ef235b51f
--- /dev/null
+++ b/Annex/MetaData.hs
@@ -0,0 +1,51 @@
+{- git-annex metadata
+ -
+ - Copyright 2014 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.MetaData where
+
+import Common.Annex
+import qualified Annex
+import Types.MetaData
+import Logs.MetaData
+
+import qualified Data.Set as S
+import qualified Data.Map as M
+import Data.Time.Calendar
+import Data.Time.Clock
+import Data.Time.Clock.POSIX
+
+tagMetaField :: MetaField
+tagMetaField = MetaField "tag"
+
+yearMetaField :: MetaField
+yearMetaField = MetaField "year"
+
+monthMetaField :: MetaField
+monthMetaField = MetaField "month"
+
+{- Generates metadata for a file that has just been ingested into the
+ - annex. Passed the FileStatus of the content file.
+ -
+ - Does not overwrite any existing metadata values for the key.
+ -}
+genMetaData :: Key -> FileStatus -> Annex ()
+genMetaData key status = whenM (annexGenMetaData <$> Annex.getGitConfig) $ do
+ metadata <- getCurrentMetaData key
+ let metadata' = genMetaData' status metadata
+ unless (metadata' == emptyMetaData) $
+ addMetaData key metadata'
+
+genMetaData' :: FileStatus -> MetaData -> MetaData
+genMetaData' status old = MetaData $ M.fromList $ filter isnew
+ [ (yearMetaField, S.singleton $ toMetaValue $ show y)
+ , (monthMetaField, S.singleton $ toMetaValue $ show m)
+ ]
+ where
+ isnew (f, _) = S.null (currentMetaDataValues f old)
+ (y, m, _d) = toGregorian $ utctDay $
+ posixSecondsToUTCTime $ realToFrac $
+ modificationTime status
diff --git a/Annex/View.hs b/Annex/View.hs
index 69d064753..9d1a763e2 100644
--- a/Annex/View.hs
+++ b/Annex/View.hs
@@ -300,8 +300,7 @@ applyView' mkviewedfile getfilemetadata view = do
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
go uh hasher f (Just (k, _)) = do
metadata <- getCurrentMetaData k
- let dirmetadata = getfilemetadata f
- let metadata' = unionMetaData dirmetadata metadata
+ let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do
stagesymlink uh hasher fv =<< inRepo (gitAnnexLink fv k)
go uh hasher f Nothing
diff --git a/Command/Add.hs b/Command/Add.hs
index d1dcb6025..0906ae531 100644
--- a/Command/Add.hs
+++ b/Command/Add.hs
@@ -19,6 +19,7 @@ import Annex.Content
import Annex.Content.Direct
import Annex.Perms
import Annex.Link
+import Annex.MetaData
import qualified Annex
import qualified Annex.Queue
#ifdef WITH_CLIBS
@@ -145,26 +146,32 @@ ingest Nothing = return (Nothing, Nothing)
ingest (Just source) = do
backend <- chooseBackend $ keyFilename source
k <- genKey source backend
- cache <- liftIO $ genInodeCache $ contentLocation source
- case (cache, inodeCache source) of
- (_, Nothing) -> go k cache
- (Just newc, Just c) | compareStrong c newc -> go k cache
+ ms <- liftIO $ catchMaybeIO $ getFileStatus $ contentLocation source
+ let mcache = toInodeCache =<< ms
+ case (mcache, inodeCache source) of
+ (_, Nothing) -> go k mcache ms
+ (Just newc, Just c) | compareStrong c newc -> go k mcache ms
_ -> failure "changed while it was being added"
where
- go k cache = ifM isDirect ( godirect k cache , goindirect k cache )
+ go k mcache ms = ifM isDirect
+ ( godirect k mcache ms
+ , goindirect k mcache ms
+ )
- goindirect (Just (key, _)) mcache = do
+ goindirect (Just (key, _)) mcache ms = do
catchAnnex (moveAnnex key $ contentLocation source)
(undo (keyFilename source) key)
+ maybe noop (genMetaData key) ms
liftIO $ nukeFile $ keyFilename source
return $ (Just key, mcache)
- goindirect Nothing _ = failure "failed to generate a key"
+ goindirect _ _ _ = failure "failed to generate a key"
- godirect (Just (key, _)) (Just cache) = do
+ godirect (Just (key, _)) (Just cache) ms = do
addInodeCache key cache
+ maybe noop (genMetaData key) ms
finishIngestDirect key source
return $ (Just key, Just cache)
- godirect _ _ = failure "failed to generate a key"
+ godirect _ _ _ = failure "failed to generate a key"
failure msg = do
warning $ keyFilename source ++ " " ++ msg
diff --git a/Command/MetaData.hs b/Command/MetaData.hs
index 6112dd095..651cb4944 100644
--- a/Command/MetaData.hs
+++ b/Command/MetaData.hs
@@ -10,6 +10,7 @@ module Command.MetaData where
import Common.Annex
import qualified Annex
import Command
+import Annex.MetaData
import Logs.MetaData
import Types.MetaData
@@ -55,7 +56,7 @@ perform :: POSIXTime -> [ModMeta] -> Key -> CommandPerform
perform _ [] k = next $ cleanup k
perform now ms k = do
oldm <- getCurrentMetaData k
- let m = foldl' unionMetaData newMetaData $ map (modMeta oldm) ms
+ let m = foldl' unionMetaData emptyMetaData $ map (modMeta oldm) ms
addMetaData' k m now
next $ cleanup k
diff --git a/Command/View.hs b/Command/View.hs
index f123e3812..e5182e852 100644
--- a/Command/View.hs
+++ b/Command/View.hs
@@ -14,6 +14,7 @@ import qualified Git.Command
import qualified Git.Ref
import qualified Git.Branch
import Types.MetaData
+import Annex.MetaData
import Types.View
import Annex.View
import Logs.View
diff --git a/Logs/MetaData.hs b/Logs/MetaData.hs
index 77c1b56a5..63314bcef 100644
--- a/Logs/MetaData.hs
+++ b/Logs/MetaData.hs
@@ -55,7 +55,7 @@ getMetaData = readLog . metaDataLogFile
getCurrentMetaData :: Key -> Annex MetaData
getCurrentMetaData = currentMetaData . collect <$$> getMetaData
where
- collect = foldl' unionMetaData newMetaData . map value . S.toAscList
+ collect = foldl' unionMetaData emptyMetaData . map value . S.toAscList
{- Adds in some metadata, which can override existing values, or unset
- them, but otherwise leaves any existing metadata as-is. -}
@@ -129,7 +129,7 @@ simplifyLog s = case sl of
go c _ [] = c
go c newer (l:ls)
- | unique == newMetaData = go c newer ls
+ | unique == emptyMetaData = go c newer ls
| otherwise = go (l { value = unique } : c)
(unionMetaData unique newer) ls
where
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index ab3dbd7b9..65984a108 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -49,6 +49,7 @@ data GitConfig = GitConfig
, annexAutoUpgrade :: AutoUpgrade
, annexExpireUnused :: Maybe (Maybe Duration)
, annexSecureEraseCommand :: Maybe String
+ , annexGenMetaData :: Bool
, coreSymlinks :: Bool
, gcryptId :: Maybe String
}
@@ -81,6 +82,7 @@ extractGitConfig r = GitConfig
, annexExpireUnused = maybe Nothing Just . parseDuration
<$> getmaybe (annex "expireunused")
, annexSecureEraseCommand = getmaybe (annex "secure-erase-command")
+ , annexGenMetaData = getbool (annex "genmetadata") False
, coreSymlinks = getbool "core.symlinks" True
, gcryptId = getmaybe "core.gcrypt-id"
}
diff --git a/Types/MetaData.hs b/Types/MetaData.hs
index 617c122a6..b941cb59b 100644
--- a/Types/MetaData.hs
+++ b/Types/MetaData.hs
@@ -17,7 +17,6 @@ module Types.MetaData (
MetaSerializable,
toMetaField,
mkMetaField,
- tagMetaField,
fromMetaField,
toMetaValue,
mkMetaValue,
@@ -25,7 +24,7 @@ module Types.MetaData (
unsetMetaData,
fromMetaValue,
fromMetaData,
- newMetaData,
+ emptyMetaData,
updateMetaData,
unionMetaData,
differenceMetaData,
@@ -81,7 +80,7 @@ instance MetaSerializable MetaData where
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
where
go (f, vs) = serialize f : map serialize (S.toList vs)
- deserialize = Just . getfield newMetaData . words
+ deserialize = Just . getfield emptyMetaData . words
where
getfield m [] = m
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
@@ -152,8 +151,8 @@ fromMetaValue (MetaValue _ f) = f
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
fromMetaData (MetaData m) = M.toList m
-newMetaData :: MetaData
-newMetaData = MetaData M.empty
+emptyMetaData :: MetaData
+emptyMetaData = MetaData M.empty
{- Can be used to set a value, or to unset it, depending on whether
- the MetaValue has CurrentlySet or not. -}
@@ -202,10 +201,10 @@ data ModMeta
- Note that the new MetaData does not include all the
- values set in the input metadata. It only contains changed values. -}
modMeta :: MetaData -> ModMeta -> MetaData
-modMeta _ (AddMeta f v) = updateMetaData f v newMetaData
-modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) newMetaData
+modMeta _ (AddMeta f v) = updateMetaData f v emptyMetaData
+modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaData
modMeta m (SetMeta f v) = updateMetaData f v $
- foldr (updateMetaData f) newMetaData $
+ foldr (updateMetaData f) emptyMetaData $
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
{- Parses field=value, field+=value, field-=value -}
@@ -233,9 +232,6 @@ mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
badField :: String -> String
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
-tagMetaField :: MetaField
-tagMetaField = MetaField "tag"
-
{- Avoid putting too many fields in the map; extremely large maps make
- the seriaization test slow due to the sheer amount of data.
- It's unlikely that more than 100 fields of metadata will be used. -}
@@ -254,7 +250,7 @@ prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
prop_metadata_sane m f v = and
[ S.member v $ metaDataValues f m'
, not (isSet v) || S.member v (currentMetaDataValues f m')
- , differenceMetaData m' newMetaData == m'
+ , differenceMetaData m' emptyMetaData == m'
]
where
m' = updateMetaData f v m
diff --git a/debian/changelog b/debian/changelog
index 4d5589ed0..edb203402 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -9,6 +9,8 @@ git-annex (5.20140222) UNRELEASED; urgency=medium
directory hierarchy in a view.
For example `git annex view tag=* podcasts/=*` makes a view in the form
tag/showname.
+ * annex.genmetadata can be set to make git-annex automatically set
+ metadata (year and month) when adding files.
-- Joey Hess <joeyh@debian.org> Fri, 21 Feb 2014 13:03:04 -0400
diff --git a/doc/design/metadata.mdwn b/doc/design/metadata.mdwn
index a648f5fe6..992ae1ca7 100644
--- a/doc/design/metadata.mdwn
+++ b/doc/design/metadata.mdwn
@@ -29,7 +29,7 @@ directories nest.
relevant metadata from the files.
TODO: It's not clear that
removing a file should nuke all the metadata used to filter it into the
- branch (especially if it's derived metadata like the year).
+ branch
Currently, only metadata used for visible subdirs is added and removed
this way.
Also, this is not usable in direct mode because deleting the
@@ -56,19 +56,7 @@ For example, by examining MP3 metadata.
Also auto add metadata when adding files to view branches. See below.
-## derived metadata
-
-This is probably not stored anywhere. It's computed on demand by a pure
-function from the other metadata.
-(Should be a general mechanism for this. (It probably generalizes to
-sql queries if we want to go that far.))
-
-### data metadata
-
-TODO From the ctime, some additional
-metadata is derived, at least year=yyyy and probably also month, etc.
-
-### directory hierarchy metadata
+## directory hierarchy metadata
From the original filename used in the master branch, when
constructing a view, generate fields. For example foo/bar/baz.mp3
@@ -87,8 +75,6 @@ those filenames to derive the same metadata, unless there is persistent
storage. Luckily, the filenames used in the views currently include the
subdirs.
-**done**!
-
# other uses for metadata
Uses are not limited to view branches.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 3c233f378..ab158a952 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -1279,6 +1279,12 @@ Here are all the supported configuration settings.
Note that setting numcopies to 0 is very unsafe.
+* `annex.genmetadata`
+
+ Set this to `true` to make git-annex automatically generate some metadata
+ when adding files to the repository. In particular, it stores
+ year and month metadata, from the file's modification date.
+
* `annex.queuesize`
git-annex builds a queue of git commands, in order to combine similar
diff --git a/doc/metadata.mdwn b/doc/metadata.mdwn
new file mode 100644
index 000000000..414a91053
--- /dev/null
+++ b/doc/metadata.mdwn
@@ -0,0 +1,41 @@
+git-annex allows you to store arbitrary metadata about files stored in the
+git-annex repository. The metadata is stored in the `git-annex` branch, and
+so is automatically kept in sync with the rest of git-annex's state, such
+as [[location_tracking]] information.
+
+Some of the things you can do with metadata include:
+
+* Using `git annex metadata file` to show all
+ the metadata associated with a file.
+* [[tips/metadata_driven_views]]
+* Limiting the files git-annex commands act on to those with
+ or without particular metadata.
+ For example `git annex find --metadata tag=foo --or --metadata tag=bar`
+* Using it in [[preferred_content]] expressions.
+ For example "tag=important or not author=me"
+
+Each file (actually the underlying key) can have any number of metadata
+fields, which each can have any number of values. For example, to tag
+files, the `tag` field is typically used, with values set to each tag that
+applies to the file.
+
+The field names are freeform (but cannot include spaces). The metadata
+values can contain absolutely anything you like -- but you're recommended
+to keep it simple and reasonably short.
+
+Here are some recommended metadata fields to use:
+
+* `tag` - With each tag being a different value.
+* `year`, `month` - When this particular version of the file came into
+ being.
+
+To make git-annex automatically set the year and month when adding files,
+run `git config annex.genmetadata true`
+
+git-annex's metadata can be updated in a distributed fashion. For example,
+two users, each with their own clone of a repository, can set and unset
+metadata at the same time, even for the same field of the same file.
+When they push their changes, `git annex merge` will combine their
+metadata changes in a consistent and (probably) intuitive way.
+
+See [[the metadata design page|design/metadata]] for more details.
diff --git a/doc/tips/metadata_driven_views.mdwn b/doc/tips/metadata_driven_views.mdwn
index e24bf29ae..0af05bd3f 100644
--- a/doc/tips/metadata_driven_views.mdwn
+++ b/doc/tips/metadata_driven_views.mdwn
@@ -1,5 +1,5 @@
git-annex now has support for storing
-[[arbitrary metadata|design/metadata]] about annexed files. For example, this can be
+[[arbitrary metadata|metadata]] about annexed files. For example, this can be
used to tag files, to record the author of a file, etc. The metadata is
synced around between repositories with the other information git-annex
keeps track of.