From 590d47ed764c88a08f615c3af057cbff26633bc1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 3 Feb 2016 16:29:34 -0400 Subject: annex.largefiles: Add support for mimetype=text/* etc, when git-annex is linked with libmagic. --- Annex/FileMatcher.hs | 27 +++++++++++++++---- BuildFlags.hs | 3 +++ Limit.hs | 30 +++++++++++++++++----- debian/changelog | 2 ++ debian/control | 1 + doc/tips/largefiles.mdwn | 13 ++++++++++ ...st:_annex.largefiles_support_for_mimetypes.mdwn | 6 +++++ git-annex.cabal | 7 +++++ 8 files changed, 78 insertions(+), 11 deletions(-) diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index b4a4b6d9a..70ea93984 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -5,6 +5,8 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE CPP #-} + module Annex.FileMatcher ( GetFileMatcher, checkFileMatcher, @@ -28,6 +30,10 @@ import Types.Remote (RemoteConfig) import Annex.CheckAttr import Git.CheckAttr (unspecifiedAttr) +#ifdef WITH_MAGICMIME +import Magic +#endif + import Data.Either import qualified Data.Set as S @@ -119,10 +125,19 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e preferreddir = fromMaybe "public" $ M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu -largeFilesParser :: String -> [ParseResult] -largeFilesParser expr = map parse $ tokenizeMatcher expr - where - parse = parseToken commonTokens +mkLargeFilesParser :: Annex (String -> [ParseResult]) +mkLargeFilesParser = do +#ifdef WITH_MAGICMIME + magicmime <- liftIO $ magicOpen [MagicMimeType] + liftIO $ magicLoadDefault magicmime +#endif + let parse = parseToken $ commonTokens +#ifdef WITH_MAGICMIME + ++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ] +#else + ++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ] +#endif + return $ map parse . tokenizeMatcher {- Generates a matcher for files large enough (or meeting other criteria) - to be added to the annex, rather than directly to git. -} @@ -138,7 +153,9 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig then return matchAll else mkmatcher expr - mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser + mkmatcher expr = do + parser <- mkLargeFilesParser + either badexpr return $ parsedToMatcher $ parser expr badexpr e = error $ "bad annex.largefiles configuration: " ++ e simply :: MatchFiles Annex -> ParseResult diff --git a/BuildFlags.hs b/BuildFlags.hs index 52e489e9a..db1937276 100644 --- a/BuildFlags.hs +++ b/BuildFlags.hs @@ -76,6 +76,9 @@ buildFlags = filter (not . null) #ifdef WITH_TORRENTPARSER , "TorrentParser" #endif +#ifdef WITH_MAGICMIME + , "MagicMime" +#endif #ifdef WITH_EKG , "EKG" #endif diff --git a/Limit.hs b/Limit.hs index 81b6d7564..e48182eaf 100644 --- a/Limit.hs +++ b/Limit.hs @@ -1,10 +1,12 @@ {- user-specified limits on files to act on - - - Copyright 2011-2014 Joey Hess + - Copyright 2011-2016 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE CPP #-} + module Limit where import Annex.Common @@ -30,6 +32,10 @@ import Utility.Glob import Utility.HumanTime import Utility.DataUnits +#ifdef WITH_MAGICMIME +import Magic +#endif + import Data.Time.Clock.POSIX import qualified Data.Set as S import qualified Data.Map as M @@ -84,11 +90,23 @@ limitExclude glob = Right $ const $ not <$$> matchGlobFile glob matchGlobFile :: String -> MatchInfo -> Annex Bool matchGlobFile glob = go - where - cglob = compileGlob glob CaseSensative -- memoized - go (MatchingKey _) = pure False - go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi) - go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af + where + cglob = compileGlob glob CaseSensative -- memoized + go (MatchingKey _) = pure False + go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi) + go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af + +#ifdef WITH_MAGICMIME +matchMagic :: Magic -> MkLimit Annex +matchMagic magic glob = Right $ const go + where + cglob = compileGlob glob CaseSensative -- memoized + go (MatchingKey _) = pure False + go (MatchingFile fi) = check (matchFile fi) + go (MatchingInfo af _ _) = check =<< getInfo af + check f = liftIO $ catchBoolIO $ + matchGlob cglob <$> magicFile magic f +#endif {- Adds a limit to skip files not believed to be present - in a specfied repository. Optionally on a prior date. -} diff --git a/debian/changelog b/debian/changelog index 6b4373af6..42951687e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -13,6 +13,8 @@ git-annex (6.20160127) UNRELEASED; urgency=medium * Limit annex.largefiles parsing to the subset of preferred content expressions that make sense in its context. So, not "standard" or "lackingcopies", etc. + * annex.largefiles: Add support for mimetype=text/* etc, when git-annex + is linked with libmagic. -- Joey Hess Thu, 28 Jan 2016 13:53:09 -0400 diff --git a/debian/control b/debian/control index f9025eb4f..3ebfbc2e4 100644 --- a/debian/control +++ b/debian/control @@ -72,6 +72,7 @@ Build-Depends: libghc-optparse-applicative-dev (>= 0.11.0), libghc-torrent-dev, libghc-concurrent-output-dev, + libghc-magic-dev, lsof [linux-any], ikiwiki, perlmagick, diff --git a/doc/tips/largefiles.mdwn b/doc/tips/largefiles.mdwn index c07d7f3f2..40cd3eafb 100644 --- a/doc/tips/largefiles.mdwn +++ b/doc/tips/largefiles.mdwn @@ -50,6 +50,8 @@ The following terms can be used in annex.largefiles: Specify files to include or exclude. + The glob can contain `*` and `?` to match arbitrary characters. + * `smallerthan=size` / `largerthan=size` Matches only files smaller than, or larger than the specified size. @@ -57,6 +59,17 @@ The following terms can be used in annex.largefiles: The size can be specified with any commonly used units, for example, "0.5 gb" or "100 KiloBytes" +* `mimetype=glob` + + Looks up the MIME type of a file, and checks if the glob matches it. + + For example, "mimetype=text/*" will match many varieties of text files, + including "text/plain", but also "text/x-shellscript", "text/x-makefile", + etc. + + This is only available to use when git-annex was built with the + MagicMime build flag. + * `anything` Matches any file. diff --git a/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes.mdwn b/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes.mdwn index f38e41dd3..b64eb45cc 100644 --- a/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes.mdwn +++ b/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes.mdwn @@ -1 +1,7 @@ It would be nice to have mimetype support on the `annex.largefiles` configuration directive. F.e. `git config annex.largefiles "not mimetype=text/plain"` + +> [[done]]; Implemented support for mimetype=text/plain or even +> mimetype=text/* +> +> Decided not to add external command test support, at least not for now. +> --[[Joey]] diff --git a/git-annex.cabal b/git-annex.cabal index b2c3647a7..a622a04e5 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -63,6 +63,9 @@ Flag TestSuite Flag TorrentParser Description: Use haskell torrent library to parse torrent files +Flag MagicMime + Description: Use libmagic to determine file MIME types + Flag ConcurrentOutput Description: Use concurrent-output library @@ -218,6 +221,10 @@ Executable git-annex Build-Depends: torrent (>= 10000.0.0) CPP-Options: -DWITH_TORRENTPARSER + if flag(MagicMime) + Build-Depends: magic + CPP-Options: -DWITH_MAGICMIME + if flag(ConcurrentOutput) Build-Depends: concurrent-output (>= 1.6) CPP-Options: -DWITH_CONCURRENTOUTPUT -- cgit v1.2.3