summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joeyh@joeyh.name>2016-02-03 14:56:34 -0400
committerGravatar Joey Hess <joeyh@joeyh.name>2016-02-03 15:04:42 -0400
commit11c3a7cd331f7b07e62493a4e8e37e0cecffc5b0 (patch)
tree746fb1ccdfe06f8eb093a0d2d4a2757a8090d3e3
parent01c701b5a5c18cfa952394ae1cbd2249cfd08f51 (diff)
Limit annex.largefiles parsing to the subset of preferred content expressions that make sense in its context.
So, not "standard" or "lackingcopies", etc.
-rw-r--r--Annex/FileMatcher.hs119
-rw-r--r--Command/MatchExpression.hs2
-rw-r--r--Logs/PreferredContent.hs4
-rw-r--r--debian/changelog3
-rw-r--r--doc/git-annex-preferred-content.mdwn4
-rw-r--r--doc/tips/largefiles.mdwn42
6 files changed, 110 insertions, 64 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs
index e3482ec8b..b4a4b6d9a 100644
--- a/Annex/FileMatcher.hs
+++ b/Annex/FileMatcher.hs
@@ -5,7 +5,15 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-module Annex.FileMatcher where
+module Annex.FileMatcher (
+ GetFileMatcher,
+ checkFileMatcher,
+ checkMatcher,
+ matchAll,
+ preferredContentParser,
+ parsedToMatcher,
+ largeFilesMatcher,
+) where
import qualified Data.Map as M
@@ -13,8 +21,6 @@ import Annex.Common
import Limit
import Utility.Matcher
import Types.Group
-import Logs.Group
-import Annex.UUID
import qualified Annex
import Types.FileMatcher
import Git.FilePath
@@ -53,53 +59,38 @@ fileMatchInfo file = do
matchAll :: FileMatcher Annex
matchAll = generate []
-parsedToMatcher :: [Either String (Token (MatchFiles Annex))] -> Either String (FileMatcher Annex)
+parsedToMatcher :: [ParseResult] -> Either String (FileMatcher Annex)
parsedToMatcher parsed = case partitionEithers parsed of
([], vs) -> Right $ generate vs
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
-exprParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))]
-exprParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
- map parse $ tokenizeMatcher expr
- where
- parse = parseToken
- matchstandard
- matchgroupwanted
- (limitPresent mu)
- (limitInDir preferreddir)
- getgroupmap
- preferreddir = fromMaybe "public" $
- M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
+data ParseToken
+ = SimpleToken String ParseResult
+ | ValueToken String (String -> ParseResult)
-parseToken :: FileMatcher Annex -> FileMatcher Annex -> MatchFiles Annex -> MatchFiles Annex -> Annex GroupMap -> String -> Either String (Token (MatchFiles Annex))
-parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir getgroupmap t
+type ParseResult = Either String (Token (MatchFiles Annex))
+
+parseToken :: [ParseToken] -> String -> ParseResult
+parseToken l t
| t `elem` tokens = Right $ token t
- | otherwise = case t of
- "standard" -> call matchstandard
- "groupwanted" -> call matchgroupwanted
- "present" -> simply checkpresent
- "inpreferreddir" -> simply checkpreferreddir
- "unused" -> simply limitUnused
- "anything" -> simply limitAnything
- "nothing" -> simply limitNothing
- _ -> case k of
- "include" -> usev limitInclude
- "exclude" -> usev limitExclude
- "copies" -> usev limitCopies
- "lackingcopies" -> usev $ limitLackingCopies False
- "approxlackingcopies" -> usev $ limitLackingCopies True
- "inbackend" -> usev limitInBackend
- "largerthan" -> usev $ limitSize (>)
- "smallerthan" -> usev $ limitSize (<)
- "metadata" -> usev limitMetaData
- "inallgroup" -> usev $ limitInAllGroup getgroupmap
- _ -> Left $ "near " ++ show t
+ | otherwise = go l
where
+ go [] = Left $ "near " ++ show t
+ go (SimpleToken s r : _) | s == t = r
+ go (ValueToken s mkr : _) | s == k = mkr v
+ go (_ : ps) = go ps
(k, v) = separate (== '=') t
- simply = Right . Operation
- usev a = Operation <$> a v
- call sub = Right $ Operation $ \notpresent mi ->
- matchMrun sub $ \a -> a notpresent mi
+
+commonTokens :: [ParseToken]
+commonTokens =
+ [ SimpleToken "unused" (simply limitUnused)
+ , SimpleToken "anything" (simply limitAnything)
+ , SimpleToken "nothing" (simply limitNothing)
+ , ValueToken "include" (usev limitInclude)
+ , ValueToken "exclude" (usev limitExclude)
+ , ValueToken "largerthan" (usev $ limitSize (>))
+ , ValueToken "smallerthan" (usev $ limitSize (<))
+ ]
{- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens;
@@ -109,6 +100,30 @@ tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
where
splitparens = segmentDelim (`elem` "()")
+preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult]
+preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
+ map parse $ tokenizeMatcher expr
+ where
+ parse = parseToken $
+ [ SimpleToken "standard" (call matchstandard)
+ , SimpleToken "groupwanted" (call matchgroupwanted)
+ , SimpleToken "present" (simply $ limitPresent mu)
+ , SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
+ , ValueToken "copies" (usev limitCopies)
+ , ValueToken "lackingcopies" (usev $ limitLackingCopies False)
+ , ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
+ , ValueToken "inbacked" (usev limitInBackend)
+ , ValueToken "metadata" (usev limitMetaData)
+ , ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap)
+ ] ++ commonTokens
+ preferreddir = fromMaybe "public" $
+ M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
+
+largeFilesParser :: String -> [ParseResult]
+largeFilesParser expr = map parse $ tokenizeMatcher expr
+ where
+ parse = parseToken commonTokens
+
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -}
largeFilesMatcher :: Annex GetFileMatcher
@@ -123,13 +138,15 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
then return matchAll
else mkmatcher expr
- mkmatcher expr = do
- u <- getUUID
- -- No need to read remote configs, that's only needed for
- -- inpreferreddir, which is used in preferred content
- -- expressions but does not make sense in the
- -- annex.largefiles expression.
- let emptyconfig = M.empty
- either badexpr return $
- parsedToMatcher $ exprParser matchAll matchAll groupMap emptyconfig (Just u) expr
+ mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
+
+simply :: MatchFiles Annex -> ParseResult
+simply = Right . Operation
+
+usev :: MkLimit Annex -> String -> ParseResult
+usev a v = Operation <$> a v
+
+call :: FileMatcher Annex -> ParseResult
+call sub = Right $ Operation $ \notpresent mi ->
+ matchMrun sub $ \a -> a notpresent mi
diff --git a/Command/MatchExpression.hs b/Command/MatchExpression.hs
index 062a46b55..a75101dce 100644
--- a/Command/MatchExpression.hs
+++ b/Command/MatchExpression.hs
@@ -60,7 +60,7 @@ optParser desc = MatchExpressionOptions
seek :: MatchExpressionOptions -> CommandSeek
seek o = do
u <- getUUID
- case parsedToMatcher $ exprParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
+ case parsedToMatcher $ preferredContentParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
Left e -> liftIO $ bail $ "bad expression: " ++ e
Right matcher -> ifM (checkmatcher matcher)
( liftIO exitSuccess
diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs
index a74effb92..d84abbaba 100644
--- a/Logs/PreferredContent.hs
+++ b/Logs/PreferredContent.hs
@@ -102,7 +102,7 @@ makeMatcher groupmap configmap groupwantedmap u = go True True
| null (lefts tokens) = generate $ rights tokens
| otherwise = unknownMatcher u
where
- tokens = exprParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
+ tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
matchstandard
| expandstandard = maybe (unknownMatcher u) (go False False)
(standardPreferredContent <$> getStandardGroup mygroups)
@@ -133,7 +133,7 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of
Left e -> Just e
Right _ -> Nothing
where
- tokens = exprParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
+ tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
{- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group (unless preferred content is
diff --git a/debian/changelog b/debian/changelog
index a834e3de1..6b4373af6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,9 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
this is particulary useful for v6 repositories, since the
.gitattributes configuration will apply in all clones of the
repository.
+ * Limit annex.largefiles parsing to the subset of preferred content
+ expressions that make sense in its context. So, not "standard"
+ or "lackingcopies", etc.
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400
diff --git a/doc/git-annex-preferred-content.mdwn b/doc/git-annex-preferred-content.mdwn
index c8c5d06af..bea82654f 100644
--- a/doc/git-annex-preferred-content.mdwn
+++ b/doc/git-annex-preferred-content.mdwn
@@ -32,7 +32,7 @@ elsewhere to allow removing it).
# EXPRESSIONS
-* `include=glob` and `exclude=glob`
+* `include=glob` / `exclude=glob`
Match files to include, or exclude.
@@ -101,7 +101,7 @@ elsewhere to allow removing it).
Matches only files that git-annex believes are present in all repositories
in the specified group.
-* `smallerthan=size` and `largerthan=size`
+* `smallerthan=size` / `largerthan=size`
Matches only files whose content is smaller than, or larger than the
specified size.
diff --git a/doc/tips/largefiles.mdwn b/doc/tips/largefiles.mdwn
index ec51ecf66..c07d7f3f2 100644
--- a/doc/tips/largefiles.mdwn
+++ b/doc/tips/largefiles.mdwn
@@ -42,19 +42,45 @@ checkouts behave differently. The git configuration overrides the
## syntax
+The value of annex.largefiles is similar to a
+[[preferred content expression|git-annex-preferred-content]].
+The following terms can be used in annex.largefiles:
+
+* `include=glob` / `exclude=glob`
+
+ Specify files to include or exclude.
+
+* `smallerthan=size` / `largerthan=size`
+
+ Matches only files smaller than, or larger than the specified size.
+
+ The size can be specified with any commonly used units, for example,
+ "0.5 gb" or "100 KiloBytes"
+
+* `anything`
+
+ Matches any file.
+
+* `nothing`
+
+ Matches no files. (Same as "not anything")
+
+* `not expression`
+
+ Inverts what the expression matches.
+
+* `and` / `or` / `( expression )`
+
+ These can be used to build up more complicated expressions.
+
The way the `.gitattributes` example above works is, `*.c` and `*.h` files
-have the annex.largefiles attribute set to "nothing", which matches nothing,
+have the annex.largefiles attribute set to "nothing",
and so those files are never treated as large files. All other files use
the other value, which checks the file size.
-The value of annex.largefiles is a
-[[preferred content expression|git-annex-preferred-content]] that is
-used to match the large files.
-
Note that, since git attribute values cannot contain whitespace,
-it's useful to instead parenthesize the terms of the
-[[preferred content expression|git-annex-preferred-content]]. This trick
-allows setting the annex.largefiles attribute to more complicated expressions.
+it's useful to instead parenthesize the terms of the annex.largefiles
+attribute. This trick allows for more complicated expressions.
For example, this is the same as the git config shown earlier, shoehorned
into a git attribute: