From 66c4abb748f3982ef334a3320e49bfc6647e87d9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 2 Mar 2014 18:01:07 -0400 Subject: pre-commit-annex hook script to automatically extract metadata from lots of types of files Using the extract(1) program to do the heavy lifting. Decided to make git-annex run pre-commit-annex when committing. Since git-annex pre-commit also runs it, it'll be run when git commit is run too, via the pre-commit hook. This basically gives back the pre-commit hook that git-annex took away. The implementation avoids repeatedly looking for the hook script when the assistant is running and committing repeatedly; only checks if the hook is available once. To make the script simpler, made git-annex metadata -s field?=value only set a field when it's not already got a value. This commit was sponsored by bak. --- Annex.hs | 3 ++ Annex/Hook.hs | 36 +++++++++++-- Command/PreCommit.hs | 9 +++- Command/Sync.hs | 2 + Git/Hook.hs | 4 ++ Types/MetaData.hs | 7 ++- debian/changelog | 3 ++ doc/design/metadata.mdwn | 9 +--- doc/git-annex.mdwn | 7 +++ doc/metadata.mdwn | 3 +- doc/tips/automatically_adding_metadata.mdwn | 24 +++++++++ .../automatically_adding_metadata/pre-commit-annex | 61 ++++++++++++++++++++++ 12 files changed, 152 insertions(+), 16 deletions(-) create mode 100644 doc/tips/automatically_adding_metadata.mdwn create mode 100755 doc/tips/automatically_adding_metadata/pre-commit-annex diff --git a/Annex.hs b/Annex.hs index e3bd95c33..b427efd59 100644 --- a/Annex.hs +++ b/Annex.hs @@ -44,6 +44,7 @@ import Git.CatFile import Git.CheckAttr import Git.CheckIgnore import Git.SharedRepository +import qualified Git.Hook import qualified Git.Queue import Types.Key import Types.Backend @@ -118,6 +119,7 @@ data AnnexState = AnnexState , errcounter :: Integer , unusedkeys :: Maybe (S.Set Key) , quviversion :: Maybe QuviVersion + , existinghooks :: M.Map Git.Hook.Hook Bool } newState :: GitConfig -> Git.Repo -> AnnexState @@ -157,6 +159,7 @@ newState c r = AnnexState , errcounter = 0 , unusedkeys = Nothing , quviversion = Nothing + , existinghooks = M.empty } {- Makes an Annex state object for the specified git repo. diff --git a/Annex/Hook.hs b/Annex/Hook.hs index 7301a0958..a6fcf7131 100644 --- a/Annex/Hook.hs +++ b/Annex/Hook.hs @@ -1,9 +1,10 @@ {- git-annex git hooks - - - Note that it's important that the scripts not change, otherwise - - removing old hooks using an old version of the script would fail. + - Note that it's important that the scripts installed by git-annex + - not change, otherwise removing old hooks using an old version of + - the script would fail. - - - Copyright 2013 Joey Hess + - Copyright 2013-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -12,12 +13,19 @@ module Annex.Hook where import Common.Annex import qualified Git.Hook as Git -import Utility.Shell import Config +import qualified Annex +import Utility.Shell +import Utility.FileMode + +import qualified Data.Map as M preCommitHook :: Git.Hook preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .") +preCommitAnnexHook :: Git.Hook +preCommitAnnexHook = Git.Hook "pre-commit-annex" "" + mkHookScript :: String -> String mkHookScript s = unlines [ shebang_local @@ -40,3 +48,23 @@ hookWarning :: Git.Hook -> String -> Annex () hookWarning h msg = do r <- gitRepo warning $ Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg + +{- Runs a hook. To avoid checking if the hook exists every time, + - the existing hooks are cached. -} +runAnnexHook :: Git.Hook -> Annex () +runAnnexHook hook = do + cmd <- fromRepo $ Git.hookFile hook + m <- Annex.getState Annex.existinghooks + case M.lookup hook m of + Just True -> run cmd + Just False -> noop + Nothing -> do + exists <- hookexists cmd + Annex.changeState $ \s -> s + { Annex.existinghooks = M.insert hook exists m } + when exists $ + run cmd + where + hookexists f = liftIO $ isExecutable . fileMode <$> getFileStatus f + run cmd = unlessM (liftIO $ boolSystem cmd []) $ + warning $ cmd ++ " failed" diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 07d958de1..fa34ad245 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -13,6 +13,7 @@ import Config import qualified Command.Add import qualified Command.Fix import Annex.Direct +import Annex.Hook import Annex.View import Annex.View.ViewedFile import Logs.View @@ -28,13 +29,16 @@ def = [command "pre-commit" paramPaths seek SectionPlumbing seek :: CommandSeek seek ps = ifM isDirect - -- update direct mode mappings for committed files - ( withWords startDirect ps + ( do + -- update direct mode mappings for committed files + withWords startDirect ps + runAnnexHook preCommitAnnexHook , do -- fix symlinks to files being committed withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps -- inject unlocked files into the annex withFilesUnlockedToBeCommitted startIndirect ps + runAnnexHook preCommitAnnexHook -- committing changes to a view updates metadata mv <- currentView case mv of @@ -43,6 +47,7 @@ seek ps = ifM isDirect (addViewMetaData v) (removeViewMetaData v) ) + startIndirect :: FilePath -> CommandStart startIndirect f = next $ do diff --git a/Command/Sync.hs b/Command/Sync.hs index f041b5d23..04086eab2 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -18,6 +18,7 @@ import qualified Types.Remote as Remote import Annex.Direct import Annex.CatFile import Annex.Link +import Annex.Hook import qualified Git.Command import qualified Git.LsFiles as LsFiles import qualified Git.Merge @@ -156,6 +157,7 @@ commitStaged commitmessage = go =<< inRepo Git.Branch.currentUnsafe where go Nothing = return False go (Just branch) = do + runAnnexHook preCommitAnnexHook parent <- inRepo $ Git.Ref.sha branch void $ inRepo $ Git.Branch.commit False commitmessage branch (maybeToList parent) diff --git a/Git/Hook.hs b/Git/Hook.hs index d56a4a565..6245a292d 100644 --- a/Git/Hook.hs +++ b/Git/Hook.hs @@ -15,6 +15,10 @@ data Hook = Hook { hookName :: FilePath , hookScript :: String } + deriving (Ord) + +instance Eq Hook where + a == b = hookName a == hookName b hookFile :: Hook -> Repo -> FilePath hookFile h r = localGitDir r "hooks" hookName h diff --git a/Types/MetaData.hs b/Types/MetaData.hs index 6f8a300b2..c37b31c51 100644 --- a/Types/MetaData.hs +++ b/Types/MetaData.hs @@ -219,6 +219,7 @@ data ModMeta = AddMeta MetaField MetaValue | DelMeta MetaField MetaValue | SetMeta MetaField MetaValue -- removes any existing values + | MaybeSetMeta MetaField MetaValue -- when field has no existing value {- Applies a ModMeta, generating the new MetaData. - Note that the new MetaData does not include all the @@ -229,12 +230,16 @@ modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaDat modMeta m (SetMeta f v) = updateMetaData f v $ foldr (updateMetaData f) emptyMetaData $ map unsetMetaValue $ S.toList $ currentMetaDataValues f m +modMeta m (MaybeSetMeta f v) + | S.null (currentMetaDataValues f m) = updateMetaData f v emptyMetaData + | otherwise = emptyMetaData -{- Parses field=value, field+=value, field-=value -} +{- Parses field=value, field+=value, field-=value, field?=value -} parseModMeta :: String -> Either String ModMeta parseModMeta p = case lastMaybe f of Just '+' -> AddMeta <$> mkMetaField f' <*> v Just '-' -> DelMeta <$> mkMetaField f' <*> v + Just '?' -> MaybeSetMeta <$> mkMetaField f' <*> v _ -> SetMeta <$> mkMetaField f <*> v where (f, sv) = separate (== '=') p diff --git a/debian/changelog b/debian/changelog index edbe3bab6..907a3d1db 100644 --- a/debian/changelog +++ b/debian/changelog @@ -16,6 +16,9 @@ git-annex (5.20140228) UNRELEASED; urgency=medium using !tag and field!=value. * vadd: Allow listing multiple desired values for a field. * view: Refuse to enter a view when no branch is currently checked out. + * metadata: To only set a field when it's not already got a value, use + -s field?=value + * Run .git/hooks/pre-commit-annex whenever a commit is made. -- Joey Hess Fri, 28 Feb 2014 14:52:15 -0400 diff --git a/doc/design/metadata.mdwn b/doc/design/metadata.mdwn index 264505a1c..10e79b9f8 100644 --- a/doc/design/metadata.mdwn +++ b/doc/design/metadata.mdwn @@ -56,14 +56,7 @@ once, and can be left alone when refining a view. When annex.genmetadata is set, git annex add automatically attaches some metadata to a file. Currently year and month fields, from its mtime. -TODO A git hook could be run by git annex add to gather more metadata. -For example, by examining file permisions or MP3 metadata. -Alternatively, this could be a -regular post-commit hook, that examines the files committed, and runs git -annex metadata to add metadata. No extra git-annex support is needed -to do that! -However, in direct mode, or when using the assistant, git-annex does its -own committing, not using git commit, so bypassing the commit hooks. +There's also a post-commit-annex hook script. ## directory hierarchy metadata diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index f0b29f336..53b9c2eab 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -717,6 +717,9 @@ subdirectories). To remove a value, use -s field-=value. + To set a value, only if the field does not already have a value, + use -s field?=value + To set a tag, use -t tag, and use -u tag to remove a tag. For example, to set some tags on a file and also its author: @@ -1678,6 +1681,10 @@ used by git-annex. `~/.config/git-annex/autostart` is a list of git repositories to start the git-annex assistant in. +`.git/hooks/pre-commit-annex` in your git repsitory will be run whenever +a commit is made, either by git commit, git-annex sync, or the git-annex +assistant. + # SEE ALSO Most of git-annex's documentation is available on its web site, diff --git a/doc/metadata.mdwn b/doc/metadata.mdwn index 9966e7d7d..df873c4c1 100644 --- a/doc/metadata.mdwn +++ b/doc/metadata.mdwn @@ -30,7 +30,8 @@ Here are some recommended metadata fields to use: being. To make git-annex automatically set the year and month when adding files, -run `git config annex.genmetadata true` +run `git config annex.genmetadata true`. Also, see +[[tips/automatically_adding_metadata]]. git-annex's metadata can be updated in a distributed fashion. For example, two users, each with their own clone of a repository, can set and unset diff --git a/doc/tips/automatically_adding_metadata.mdwn b/doc/tips/automatically_adding_metadata.mdwn new file mode 100644 index 000000000..bd8fea737 --- /dev/null +++ b/doc/tips/automatically_adding_metadata.mdwn @@ -0,0 +1,24 @@ +git-annex's [[metadata]] works best when files have a lot of useful +metadata attached to them. + +To make git-annex automatically set the year and month when adding files, +run `git config annex.genmetadata true`. + +A git commit hook can be set up to extract lots of metadata from files +like photos, mp3s, etc. + +* Install the `extract` utility, from + `apt-get install extract` +* Download [[pre-commit-annex]] and install it in your git-annex repository + as `.git/hooks/pre-commit-annex`. + Remember to make the script executable! +* Run: `git config metadata.extract "artist album title camera_make video_dimensions"` + +Now any fields you list in metadata.extract to will be extracted and +stored when files are committed. + +To get a list of all possible fields, run: `extract -L | sed ' ' _` + +By default, if a git-annex already has a metadata field for a file, +its value will not be overwritten with metadata taken from files. +To allow overwriting, run: `git config metadata.overwrite true` diff --git a/doc/tips/automatically_adding_metadata/pre-commit-annex b/doc/tips/automatically_adding_metadata/pre-commit-annex new file mode 100755 index 000000000..4897493cf --- /dev/null +++ b/doc/tips/automatically_adding_metadata/pre-commit-annex @@ -0,0 +1,61 @@ +#!/bin/sh +# This script can be used to add git-annex metadata to files when they're +# committed. +# +# Copyright 2014 Joey Hess +# License: GPL-3+ + +extract="$(git config metadata.extract || true)" +want="$(perl -e 'print (join("|", map {s/_/ /g; "^$_ - "} (split " ", shift())))' "$extract")" + +if [ -z "$want" ]; then + exit 0 +fi + +echo "$want" + +case "$(git config --bool metadata.overwrite || true)" in + true) + overwrite=1 + ;; + *) + overwrite="" + ;; +esac + +# By default, any existing metadata git-annex has stored is not overwritten +# with metadata taken from files. Uncomment this to enable. +#overwrite_metadata=1 + +addmeta () { + file="$1" + field="$2" + value="$3" + afield="$(echo "$field" | tr ' ' _)" + if [ "$overwrite" ]; then + p="$afield=$value" + + else + p="$afield?=$value" + fi + git -c annex.alwayscommit=false annex metadata "$file" -s "$p" --quiet +} + +if git rev-parse --verify HEAD >/dev/null 2>&1; then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 +fi + +IFS=" +" +for f in $(git diff-index --name-only --cached $against); do + if [ -e "$f" ]; then + for l in $(extract "$f" | egrep "$want"); do + field="${l%% - *}" + value="${l#* - }" + addmeta "$f" "$field" "$value" + done + fi +done -- cgit v1.2.3