summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-02-03 16:29:34 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-02-03 16:29:34 -0400
commit590d47ed764c88a08f615c3af057cbff26633bc1 (patch)
tree4ea91c9c5cb34929df631fc74923ee8d5248471c
parent3f1649ef32cb7f7597473dae6a0e92ff875cea45 (diff)
annex.largefiles: Add support for mimetype=text/* etc, when git-annex is linked with libmagic.
-rw-r--r--Annex/FileMatcher.hs27
-rw-r--r--BuildFlags.hs3
-rw-r--r--Limit.hs30
-rw-r--r--debian/changelog2
-rw-r--r--debian/control1
-rw-r--r--doc/tips/largefiles.mdwn13
-rw-r--r--doc/todo/wishlist:_annex.largefiles_support_for_mimetypes.mdwn6
-rw-r--r--git-annex.cabal7
8 files changed, 78 insertions, 11 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs
index b4a4b6d9a..70ea93984 100644
--- a/Annex/FileMatcher.hs
+++ b/Annex/FileMatcher.hs
@@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE CPP #-}
+
module Annex.FileMatcher (
GetFileMatcher,
checkFileMatcher,
@@ -28,6 +30,10 @@ import Types.Remote (RemoteConfig)
import Annex.CheckAttr
import Git.CheckAttr (unspecifiedAttr)
+#ifdef WITH_MAGICMIME
+import Magic
+#endif
+
import Data.Either
import qualified Data.Set as S
@@ -119,10 +125,19 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
-largeFilesParser :: String -> [ParseResult]
-largeFilesParser expr = map parse $ tokenizeMatcher expr
- where
- parse = parseToken commonTokens
+mkLargeFilesParser :: Annex (String -> [ParseResult])
+mkLargeFilesParser = do
+#ifdef WITH_MAGICMIME
+ magicmime <- liftIO $ magicOpen [MagicMimeType]
+ liftIO $ magicLoadDefault magicmime
+#endif
+ let parse = parseToken $ commonTokens
+#ifdef WITH_MAGICMIME
+ ++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
+#else
+ ++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
+#endif
+ return $ map parse . tokenizeMatcher
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -}
@@ -138,7 +153,9 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
then return matchAll
else mkmatcher expr
- mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser
+ mkmatcher expr = do
+ parser <- mkLargeFilesParser
+ either badexpr return $ parsedToMatcher $ parser expr
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
simply :: MatchFiles Annex -> ParseResult
diff --git a/BuildFlags.hs b/BuildFlags.hs
index 52e489e9a..db1937276 100644
--- a/BuildFlags.hs
+++ b/BuildFlags.hs
@@ -76,6 +76,9 @@ buildFlags = filter (not . null)
#ifdef WITH_TORRENTPARSER
, "TorrentParser"
#endif
+#ifdef WITH_MAGICMIME
+ , "MagicMime"
+#endif
#ifdef WITH_EKG
, "EKG"
#endif
diff --git a/Limit.hs b/Limit.hs
index 81b6d7564..e48182eaf 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -1,10 +1,12 @@
{- user-specified limits on files to act on
-
- - Copyright 2011-2014 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE CPP #-}
+
module Limit where
import Annex.Common
@@ -30,6 +32,10 @@ import Utility.Glob
import Utility.HumanTime
import Utility.DataUnits
+#ifdef WITH_MAGICMIME
+import Magic
+#endif
+
import Data.Time.Clock.POSIX
import qualified Data.Set as S
import qualified Data.Map as M
@@ -84,11 +90,23 @@ limitExclude glob = Right $ const $ not <$$> matchGlobFile glob
matchGlobFile :: String -> MatchInfo -> Annex Bool
matchGlobFile glob = go
- where
- cglob = compileGlob glob CaseSensative -- memoized
- go (MatchingKey _) = pure False
- go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
- go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
+ where
+ cglob = compileGlob glob CaseSensative -- memoized
+ go (MatchingKey _) = pure False
+ go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
+ go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
+
+#ifdef WITH_MAGICMIME
+matchMagic :: Magic -> MkLimit Annex
+matchMagic magic glob = Right $ const go
+ where
+ cglob = compileGlob glob CaseSensative -- memoized
+ go (MatchingKey _) = pure False
+ go (MatchingFile fi) = check (matchFile fi)
+ go (MatchingInfo af _ _) = check =<< getInfo af
+ check f = liftIO $ catchBoolIO $
+ matchGlob cglob <$> magicFile magic f
+#endif
{- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -}
diff --git a/debian/changelog b/debian/changelog
index 6b4373af6..42951687e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -13,6 +13,8 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
* Limit annex.largefiles parsing to the subset of preferred content
expressions that make sense in its context. So, not "standard"
or "lackingcopies", etc.
+ * annex.largefiles: Add support for mimetype=text/* etc, when git-annex
+ is linked with libmagic.
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400
diff --git a/debian/control b/debian/control
index f9025eb4f..3ebfbc2e4 100644
--- a/debian/control
+++ b/debian/control
@@ -72,6 +72,7 @@ Build-Depends:
libghc-optparse-applicative-dev (>= 0.11.0),
libghc-torrent-dev,
libghc-concurrent-output-dev,
+ libghc-magic-dev,
lsof [linux-any],
ikiwiki,
perlmagick,
diff --git a/doc/tips/largefiles.mdwn b/doc/tips/largefiles.mdwn
index c07d7f3f2..40cd3eafb 100644
--- a/doc/tips/largefiles.mdwn
+++ b/doc/tips/largefiles.mdwn
@@ -50,6 +50,8 @@ The following terms can be used in annex.largefiles:
Specify files to include or exclude.
+ The glob can contain `*` and `?` to match arbitrary characters.
+
* `smallerthan=size` / `largerthan=size`
Matches only files smaller than, or larger than the specified size.
@@ -57,6 +59,17 @@ The following terms can be used in annex.largefiles:
The size can be specified with any commonly used units, for example,
"0.5 gb" or "100 KiloBytes"
+* `mimetype=glob`
+
+ Looks up the MIME type of a file, and checks if the glob matches it.
+
+ For example, "mimetype=text/*" will match many varieties of text files,
+ including "text/plain", but also "text/x-shellscript", "text/x-makefile",
+ etc.
+
+ This is only available to use when git-annex was built with the
+ MagicMime build flag.
+
* `anything`
Matches any file.
diff --git a/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes.mdwn b/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes.mdwn
index f38e41dd3..b64eb45cc 100644
--- a/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes.mdwn
+++ b/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes.mdwn
@@ -1 +1,7 @@
It would be nice to have mimetype support on the `annex.largefiles` configuration directive. F.e. `git config annex.largefiles "not mimetype=text/plain"`
+
+> [[done]]; Implemented support for mimetype=text/plain or even
+> mimetype=text/*
+>
+> Decided not to add external command test support, at least not for now.
+> --[[Joey]]
diff --git a/git-annex.cabal b/git-annex.cabal
index b2c3647a7..a622a04e5 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -63,6 +63,9 @@ Flag TestSuite
Flag TorrentParser
Description: Use haskell torrent library to parse torrent files
+Flag MagicMime
+ Description: Use libmagic to determine file MIME types
+
Flag ConcurrentOutput
Description: Use concurrent-output library
@@ -218,6 +221,10 @@ Executable git-annex
Build-Depends: torrent (>= 10000.0.0)
CPP-Options: -DWITH_TORRENTPARSER
+ if flag(MagicMime)
+ Build-Depends: magic
+ CPP-Options: -DWITH_MAGICMIME
+
if flag(ConcurrentOutput)
Build-Depends: concurrent-output (>= 1.6)
CPP-Options: -DWITH_CONCURRENTOUTPUT