summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-03-02 18:01:07 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-03-02 20:11:58 -0400
commit66c4abb748f3982ef334a3320e49bfc6647e87d9 (patch)
tree1785f2bee2250d74a01c5de2e06b8b0b65822f45
parente916f8028ce1f90e851166b35f3bcec976aa09b3 (diff)
pre-commit-annex hook script to automatically extract metadata from lots of types of files
Using the extract(1) program to do the heavy lifting. Decided to make git-annex run pre-commit-annex when committing. Since git-annex pre-commit also runs it, it'll be run when git commit is run too, via the pre-commit hook. This basically gives back the pre-commit hook that git-annex took away. The implementation avoids repeatedly looking for the hook script when the assistant is running and committing repeatedly; only checks if the hook is available once. To make the script simpler, made git-annex metadata -s field?=value only set a field when it's not already got a value. This commit was sponsored by bak.
-rw-r--r--Annex.hs3
-rw-r--r--Annex/Hook.hs36
-rw-r--r--Command/PreCommit.hs9
-rw-r--r--Command/Sync.hs2
-rw-r--r--Git/Hook.hs4
-rw-r--r--Types/MetaData.hs7
-rw-r--r--debian/changelog3
-rw-r--r--doc/design/metadata.mdwn9
-rw-r--r--doc/git-annex.mdwn7
-rw-r--r--doc/metadata.mdwn3
-rw-r--r--doc/tips/automatically_adding_metadata.mdwn24
-rwxr-xr-xdoc/tips/automatically_adding_metadata/pre-commit-annex61
12 files changed, 152 insertions, 16 deletions
diff --git a/Annex.hs b/Annex.hs
index e3bd95c33..b427efd59 100644
--- a/Annex.hs
+++ b/Annex.hs
@@ -44,6 +44,7 @@ import Git.CatFile
import Git.CheckAttr
import Git.CheckIgnore
import Git.SharedRepository
+import qualified Git.Hook
import qualified Git.Queue
import Types.Key
import Types.Backend
@@ -118,6 +119,7 @@ data AnnexState = AnnexState
, errcounter :: Integer
, unusedkeys :: Maybe (S.Set Key)
, quviversion :: Maybe QuviVersion
+ , existinghooks :: M.Map Git.Hook.Hook Bool
}
newState :: GitConfig -> Git.Repo -> AnnexState
@@ -157,6 +159,7 @@ newState c r = AnnexState
, errcounter = 0
, unusedkeys = Nothing
, quviversion = Nothing
+ , existinghooks = M.empty
}
{- Makes an Annex state object for the specified git repo.
diff --git a/Annex/Hook.hs b/Annex/Hook.hs
index 7301a0958..a6fcf7131 100644
--- a/Annex/Hook.hs
+++ b/Annex/Hook.hs
@@ -1,9 +1,10 @@
{- git-annex git hooks
-
- - Note that it's important that the scripts not change, otherwise
- - removing old hooks using an old version of the script would fail.
+ - Note that it's important that the scripts installed by git-annex
+ - not change, otherwise removing old hooks using an old version of
+ - the script would fail.
-
- - Copyright 2013 Joey Hess <joey@kitenet.net>
+ - Copyright 2013-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -12,12 +13,19 @@ module Annex.Hook where
import Common.Annex
import qualified Git.Hook as Git
-import Utility.Shell
import Config
+import qualified Annex
+import Utility.Shell
+import Utility.FileMode
+
+import qualified Data.Map as M
preCommitHook :: Git.Hook
preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .")
+preCommitAnnexHook :: Git.Hook
+preCommitAnnexHook = Git.Hook "pre-commit-annex" ""
+
mkHookScript :: String -> String
mkHookScript s = unlines
[ shebang_local
@@ -40,3 +48,23 @@ hookWarning :: Git.Hook -> String -> Annex ()
hookWarning h msg = do
r <- gitRepo
warning $ Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg
+
+{- Runs a hook. To avoid checking if the hook exists every time,
+ - the existing hooks are cached. -}
+runAnnexHook :: Git.Hook -> Annex ()
+runAnnexHook hook = do
+ cmd <- fromRepo $ Git.hookFile hook
+ m <- Annex.getState Annex.existinghooks
+ case M.lookup hook m of
+ Just True -> run cmd
+ Just False -> noop
+ Nothing -> do
+ exists <- hookexists cmd
+ Annex.changeState $ \s -> s
+ { Annex.existinghooks = M.insert hook exists m }
+ when exists $
+ run cmd
+ where
+ hookexists f = liftIO $ isExecutable . fileMode <$> getFileStatus f
+ run cmd = unlessM (liftIO $ boolSystem cmd []) $
+ warning $ cmd ++ " failed"
diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs
index 07d958de1..fa34ad245 100644
--- a/Command/PreCommit.hs
+++ b/Command/PreCommit.hs
@@ -13,6 +13,7 @@ import Config
import qualified Command.Add
import qualified Command.Fix
import Annex.Direct
+import Annex.Hook
import Annex.View
import Annex.View.ViewedFile
import Logs.View
@@ -28,13 +29,16 @@ def = [command "pre-commit" paramPaths seek SectionPlumbing
seek :: CommandSeek
seek ps = ifM isDirect
- -- update direct mode mappings for committed files
- ( withWords startDirect ps
+ ( do
+ -- update direct mode mappings for committed files
+ withWords startDirect ps
+ runAnnexHook preCommitAnnexHook
, do
-- fix symlinks to files being committed
withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps
-- inject unlocked files into the annex
withFilesUnlockedToBeCommitted startIndirect ps
+ runAnnexHook preCommitAnnexHook
-- committing changes to a view updates metadata
mv <- currentView
case mv of
@@ -43,6 +47,7 @@ seek ps = ifM isDirect
(addViewMetaData v)
(removeViewMetaData v)
)
+
startIndirect :: FilePath -> CommandStart
startIndirect f = next $ do
diff --git a/Command/Sync.hs b/Command/Sync.hs
index f041b5d23..04086eab2 100644
--- a/Command/Sync.hs
+++ b/Command/Sync.hs
@@ -18,6 +18,7 @@ import qualified Types.Remote as Remote
import Annex.Direct
import Annex.CatFile
import Annex.Link
+import Annex.Hook
import qualified Git.Command
import qualified Git.LsFiles as LsFiles
import qualified Git.Merge
@@ -156,6 +157,7 @@ commitStaged commitmessage = go =<< inRepo Git.Branch.currentUnsafe
where
go Nothing = return False
go (Just branch) = do
+ runAnnexHook preCommitAnnexHook
parent <- inRepo $ Git.Ref.sha branch
void $ inRepo $ Git.Branch.commit False commitmessage branch
(maybeToList parent)
diff --git a/Git/Hook.hs b/Git/Hook.hs
index d56a4a565..6245a292d 100644
--- a/Git/Hook.hs
+++ b/Git/Hook.hs
@@ -15,6 +15,10 @@ data Hook = Hook
{ hookName :: FilePath
, hookScript :: String
}
+ deriving (Ord)
+
+instance Eq Hook where
+ a == b = hookName a == hookName b
hookFile :: Hook -> Repo -> FilePath
hookFile h r = localGitDir r </> "hooks" </> hookName h
diff --git a/Types/MetaData.hs b/Types/MetaData.hs
index 6f8a300b2..c37b31c51 100644
--- a/Types/MetaData.hs
+++ b/Types/MetaData.hs
@@ -219,6 +219,7 @@ data ModMeta
= AddMeta MetaField MetaValue
| DelMeta MetaField MetaValue
| SetMeta MetaField MetaValue -- removes any existing values
+ | MaybeSetMeta MetaField MetaValue -- when field has no existing value
{- Applies a ModMeta, generating the new MetaData.
- Note that the new MetaData does not include all the
@@ -229,12 +230,16 @@ modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaDat
modMeta m (SetMeta f v) = updateMetaData f v $
foldr (updateMetaData f) emptyMetaData $
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
+modMeta m (MaybeSetMeta f v)
+ | S.null (currentMetaDataValues f m) = updateMetaData f v emptyMetaData
+ | otherwise = emptyMetaData
-{- Parses field=value, field+=value, field-=value -}
+{- Parses field=value, field+=value, field-=value, field?=value -}
parseModMeta :: String -> Either String ModMeta
parseModMeta p = case lastMaybe f of
Just '+' -> AddMeta <$> mkMetaField f' <*> v
Just '-' -> DelMeta <$> mkMetaField f' <*> v
+ Just '?' -> MaybeSetMeta <$> mkMetaField f' <*> v
_ -> SetMeta <$> mkMetaField f <*> v
where
(f, sv) = separate (== '=') p
diff --git a/debian/changelog b/debian/changelog
index edbe3bab6..907a3d1db 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -16,6 +16,9 @@ git-annex (5.20140228) UNRELEASED; urgency=medium
using !tag and field!=value.
* vadd: Allow listing multiple desired values for a field.
* view: Refuse to enter a view when no branch is currently checked out.
+ * metadata: To only set a field when it's not already got a value, use
+ -s field?=value
+ * Run .git/hooks/pre-commit-annex whenever a commit is made.
-- Joey Hess <joeyh@debian.org> Fri, 28 Feb 2014 14:52:15 -0400
diff --git a/doc/design/metadata.mdwn b/doc/design/metadata.mdwn
index 264505a1c..10e79b9f8 100644
--- a/doc/design/metadata.mdwn
+++ b/doc/design/metadata.mdwn
@@ -56,14 +56,7 @@ once, and can be left alone when refining a view.
When annex.genmetadata is set, git annex add automatically attaches
some metadata to a file. Currently year and month fields, from its mtime.
-TODO A git hook could be run by git annex add to gather more metadata.
-For example, by examining file permisions or MP3 metadata.
-Alternatively, this could be a
-regular post-commit hook, that examines the files committed, and runs git
-annex metadata to add metadata. No extra git-annex support is needed
-to do that!
-However, in direct mode, or when using the assistant, git-annex does its
-own committing, not using git commit, so bypassing the commit hooks.
+There's also a post-commit-annex hook script.
## directory hierarchy metadata
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index f0b29f336..53b9c2eab 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -717,6 +717,9 @@ subdirectories).
To remove a value, use -s field-=value.
+ To set a value, only if the field does not already have a value,
+ use -s field?=value
+
To set a tag, use -t tag, and use -u tag to remove a tag.
For example, to set some tags on a file and also its author:
@@ -1678,6 +1681,10 @@ used by git-annex.
`~/.config/git-annex/autostart` is a list of git repositories
to start the git-annex assistant in.
+`.git/hooks/pre-commit-annex` in your git repsitory will be run whenever
+a commit is made, either by git commit, git-annex sync, or the git-annex
+assistant.
+
# SEE ALSO
Most of git-annex's documentation is available on its web site,
diff --git a/doc/metadata.mdwn b/doc/metadata.mdwn
index 9966e7d7d..df873c4c1 100644
--- a/doc/metadata.mdwn
+++ b/doc/metadata.mdwn
@@ -30,7 +30,8 @@ Here are some recommended metadata fields to use:
being.
To make git-annex automatically set the year and month when adding files,
-run `git config annex.genmetadata true`
+run `git config annex.genmetadata true`. Also, see
+[[tips/automatically_adding_metadata]].
git-annex's metadata can be updated in a distributed fashion. For example,
two users, each with their own clone of a repository, can set and unset
diff --git a/doc/tips/automatically_adding_metadata.mdwn b/doc/tips/automatically_adding_metadata.mdwn
new file mode 100644
index 000000000..bd8fea737
--- /dev/null
+++ b/doc/tips/automatically_adding_metadata.mdwn
@@ -0,0 +1,24 @@
+git-annex's [[metadata]] works best when files have a lot of useful
+metadata attached to them.
+
+To make git-annex automatically set the year and month when adding files,
+run `git config annex.genmetadata true`.
+
+A git commit hook can be set up to extract lots of metadata from files
+like photos, mp3s, etc.
+
+* Install the `extract` utility, from <http://www.gnu.org/software/libextractor/>
+ `apt-get install extract`
+* Download [[pre-commit-annex]] and install it in your git-annex repository
+ as `.git/hooks/pre-commit-annex`.
+ Remember to make the script executable!
+* Run: `git config metadata.extract "artist album title camera_make video_dimensions"`
+
+Now any fields you list in metadata.extract to will be extracted and
+stored when files are committed.
+
+To get a list of all possible fields, run: `extract -L | sed ' ' _`
+
+By default, if a git-annex already has a metadata field for a file,
+its value will not be overwritten with metadata taken from files.
+To allow overwriting, run: `git config metadata.overwrite true`
diff --git a/doc/tips/automatically_adding_metadata/pre-commit-annex b/doc/tips/automatically_adding_metadata/pre-commit-annex
new file mode 100755
index 000000000..4897493cf
--- /dev/null
+++ b/doc/tips/automatically_adding_metadata/pre-commit-annex
@@ -0,0 +1,61 @@
+#!/bin/sh
+# This script can be used to add git-annex metadata to files when they're
+# committed.
+#
+# Copyright 2014 Joey Hess <id@joeyh.name>
+# License: GPL-3+
+
+extract="$(git config metadata.extract || true)"
+want="$(perl -e 'print (join("|", map {s/_/ /g; "^$_ - "} (split " ", shift())))' "$extract")"
+
+if [ -z "$want" ]; then
+ exit 0
+fi
+
+echo "$want"
+
+case "$(git config --bool metadata.overwrite || true)" in
+ true)
+ overwrite=1
+ ;;
+ *)
+ overwrite=""
+ ;;
+esac
+
+# By default, any existing metadata git-annex has stored is not overwritten
+# with metadata taken from files. Uncomment this to enable.
+#overwrite_metadata=1
+
+addmeta () {
+ file="$1"
+ field="$2"
+ value="$3"
+ afield="$(echo "$field" | tr ' ' _)"
+ if [ "$overwrite" ]; then
+ p="$afield=$value"
+
+ else
+ p="$afield?=$value"
+ fi
+ git -c annex.alwayscommit=false annex metadata "$file" -s "$p" --quiet
+}
+
+if git rev-parse --verify HEAD >/dev/null 2>&1; then
+ against=HEAD
+else
+ # Initial commit: diff against an empty tree object
+ against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
+fi
+
+IFS="
+"
+for f in $(git diff-index --name-only --cached $against); do
+ if [ -e "$f" ]; then
+ for l in $(extract "$f" | egrep "$want"); do
+ field="${l%% - *}"
+ value="${l#* - }"
+ addmeta "$f" "$field" "$value"
+ done
+ fi
+done