diff options
-rw-r--r-- | Annex/FileMatcher.hs | 119 | ||||
-rw-r--r-- | Command/MatchExpression.hs | 2 | ||||
-rw-r--r-- | Logs/PreferredContent.hs | 4 | ||||
-rw-r--r-- | debian/changelog | 3 | ||||
-rw-r--r-- | doc/git-annex-preferred-content.mdwn | 4 | ||||
-rw-r--r-- | doc/tips/largefiles.mdwn | 42 |
6 files changed, 110 insertions, 64 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index e3482ec8b..b4a4b6d9a 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -5,7 +5,15 @@ - Licensed under the GNU GPL version 3 or higher. -} -module Annex.FileMatcher where +module Annex.FileMatcher ( + GetFileMatcher, + checkFileMatcher, + checkMatcher, + matchAll, + preferredContentParser, + parsedToMatcher, + largeFilesMatcher, +) where import qualified Data.Map as M @@ -13,8 +21,6 @@ import Annex.Common import Limit import Utility.Matcher import Types.Group -import Logs.Group -import Annex.UUID import qualified Annex import Types.FileMatcher import Git.FilePath @@ -53,53 +59,38 @@ fileMatchInfo file = do matchAll :: FileMatcher Annex matchAll = generate [] -parsedToMatcher :: [Either String (Token (MatchFiles Annex))] -> Either String (FileMatcher Annex) +parsedToMatcher :: [ParseResult] -> Either String (FileMatcher Annex) parsedToMatcher parsed = case partitionEithers parsed of ([], vs) -> Right $ generate vs (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es -exprParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))] -exprParser matchstandard matchgroupwanted getgroupmap configmap mu expr = - map parse $ tokenizeMatcher expr - where - parse = parseToken - matchstandard - matchgroupwanted - (limitPresent mu) - (limitInDir preferreddir) - getgroupmap - preferreddir = fromMaybe "public" $ - M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu +data ParseToken + = SimpleToken String ParseResult + | ValueToken String (String -> ParseResult) -parseToken :: FileMatcher Annex -> FileMatcher Annex -> MatchFiles Annex -> MatchFiles Annex -> Annex GroupMap -> String -> Either String (Token (MatchFiles Annex)) -parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir getgroupmap t +type ParseResult = Either String (Token (MatchFiles Annex)) + +parseToken :: [ParseToken] -> String -> ParseResult +parseToken l t | t `elem` tokens = Right $ token t - | otherwise = case t of - "standard" -> call matchstandard - "groupwanted" -> call matchgroupwanted - "present" -> simply checkpresent - "inpreferreddir" -> simply checkpreferreddir - "unused" -> simply limitUnused - "anything" -> simply limitAnything - "nothing" -> simply limitNothing - _ -> case k of - "include" -> usev limitInclude - "exclude" -> usev limitExclude - "copies" -> usev limitCopies - "lackingcopies" -> usev $ limitLackingCopies False - "approxlackingcopies" -> usev $ limitLackingCopies True - "inbackend" -> usev limitInBackend - "largerthan" -> usev $ limitSize (>) - "smallerthan" -> usev $ limitSize (<) - "metadata" -> usev limitMetaData - "inallgroup" -> usev $ limitInAllGroup getgroupmap - _ -> Left $ "near " ++ show t + | otherwise = go l where + go [] = Left $ "near " ++ show t + go (SimpleToken s r : _) | s == t = r + go (ValueToken s mkr : _) | s == k = mkr v + go (_ : ps) = go ps (k, v) = separate (== '=') t - simply = Right . Operation - usev a = Operation <$> a v - call sub = Right $ Operation $ \notpresent mi -> - matchMrun sub $ \a -> a notpresent mi + +commonTokens :: [ParseToken] +commonTokens = + [ SimpleToken "unused" (simply limitUnused) + , SimpleToken "anything" (simply limitAnything) + , SimpleToken "nothing" (simply limitNothing) + , ValueToken "include" (usev limitInclude) + , ValueToken "exclude" (usev limitExclude) + , ValueToken "largerthan" (usev $ limitSize (>)) + , ValueToken "smallerthan" (usev $ limitSize (<)) + ] {- This is really dumb tokenization; there's no support for quoted values. - Open and close parens are always treated as standalone tokens; @@ -109,6 +100,30 @@ tokenizeMatcher = filter (not . null ) . concatMap splitparens . words where splitparens = segmentDelim (`elem` "()") +preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult] +preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr = + map parse $ tokenizeMatcher expr + where + parse = parseToken $ + [ SimpleToken "standard" (call matchstandard) + , SimpleToken "groupwanted" (call matchgroupwanted) + , SimpleToken "present" (simply $ limitPresent mu) + , SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir) + , ValueToken "copies" (usev limitCopies) + , ValueToken "lackingcopies" (usev $ limitLackingCopies False) + , ValueToken "approxlackingcopies" (usev $ limitLackingCopies True) + , ValueToken "inbacked" (usev limitInBackend) + , ValueToken "metadata" (usev limitMetaData) + , ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap) + ] ++ commonTokens + preferreddir = fromMaybe "public" $ + M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu + +largeFilesParser :: String -> [ParseResult] +largeFilesParser expr = map parse $ tokenizeMatcher expr + where + parse = parseToken commonTokens + {- Generates a matcher for files large enough (or meeting other criteria) - to be added to the annex, rather than directly to git. -} largeFilesMatcher :: Annex GetFileMatcher @@ -123,13 +138,15 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig then return matchAll else mkmatcher expr - mkmatcher expr = do - u <- getUUID - -- No need to read remote configs, that's only needed for - -- inpreferreddir, which is used in preferred content - -- expressions but does not make sense in the - -- annex.largefiles expression. - let emptyconfig = M.empty - either badexpr return $ - parsedToMatcher $ exprParser matchAll matchAll groupMap emptyconfig (Just u) expr + mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser badexpr e = error $ "bad annex.largefiles configuration: " ++ e + +simply :: MatchFiles Annex -> ParseResult +simply = Right . Operation + +usev :: MkLimit Annex -> String -> ParseResult +usev a v = Operation <$> a v + +call :: FileMatcher Annex -> ParseResult +call sub = Right $ Operation $ \notpresent mi -> + matchMrun sub $ \a -> a notpresent mi diff --git a/Command/MatchExpression.hs b/Command/MatchExpression.hs index 062a46b55..a75101dce 100644 --- a/Command/MatchExpression.hs +++ b/Command/MatchExpression.hs @@ -60,7 +60,7 @@ optParser desc = MatchExpressionOptions seek :: MatchExpressionOptions -> CommandSeek seek o = do u <- getUUID - case parsedToMatcher $ exprParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of + case parsedToMatcher $ preferredContentParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of Left e -> liftIO $ bail $ "bad expression: " ++ e Right matcher -> ifM (checkmatcher matcher) ( liftIO exitSuccess diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index a74effb92..d84abbaba 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -102,7 +102,7 @@ makeMatcher groupmap configmap groupwantedmap u = go True True | null (lefts tokens) = generate $ rights tokens | otherwise = unknownMatcher u where - tokens = exprParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr + tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr matchstandard | expandstandard = maybe (unknownMatcher u) (go False False) (standardPreferredContent <$> getStandardGroup mygroups) @@ -133,7 +133,7 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of Left e -> Just e Right _ -> Nothing where - tokens = exprParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr + tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr {- Puts a UUID in a standard group, and sets its preferred content to use - the standard expression for that group (unless preferred content is diff --git a/debian/changelog b/debian/changelog index a834e3de1..6b4373af6 100644 --- a/debian/changelog +++ b/debian/changelog @@ -10,6 +10,9 @@ git-annex (6.20160127) UNRELEASED; urgency=medium this is particulary useful for v6 repositories, since the .gitattributes configuration will apply in all clones of the repository. + * Limit annex.largefiles parsing to the subset of preferred content + expressions that make sense in its context. So, not "standard" + or "lackingcopies", etc. -- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400 diff --git a/doc/git-annex-preferred-content.mdwn b/doc/git-annex-preferred-content.mdwn index c8c5d06af..bea82654f 100644 --- a/doc/git-annex-preferred-content.mdwn +++ b/doc/git-annex-preferred-content.mdwn @@ -32,7 +32,7 @@ elsewhere to allow removing it). # EXPRESSIONS -* `include=glob` and `exclude=glob` +* `include=glob` / `exclude=glob` Match files to include, or exclude. @@ -101,7 +101,7 @@ elsewhere to allow removing it). Matches only files that git-annex believes are present in all repositories in the specified group. -* `smallerthan=size` and `largerthan=size` +* `smallerthan=size` / `largerthan=size` Matches only files whose content is smaller than, or larger than the specified size. diff --git a/doc/tips/largefiles.mdwn b/doc/tips/largefiles.mdwn index ec51ecf66..c07d7f3f2 100644 --- a/doc/tips/largefiles.mdwn +++ b/doc/tips/largefiles.mdwn @@ -42,19 +42,45 @@ checkouts behave differently. The git configuration overrides the ## syntax +The value of annex.largefiles is similar to a +[[preferred content expression|git-annex-preferred-content]]. +The following terms can be used in annex.largefiles: + +* `include=glob` / `exclude=glob` + + Specify files to include or exclude. + +* `smallerthan=size` / `largerthan=size` + + Matches only files smaller than, or larger than the specified size. + + The size can be specified with any commonly used units, for example, + "0.5 gb" or "100 KiloBytes" + +* `anything` + + Matches any file. + +* `nothing` + + Matches no files. (Same as "not anything") + +* `not expression` + + Inverts what the expression matches. + +* `and` / `or` / `( expression )` + + These can be used to build up more complicated expressions. + The way the `.gitattributes` example above works is, `*.c` and `*.h` files -have the annex.largefiles attribute set to "nothing", which matches nothing, +have the annex.largefiles attribute set to "nothing", and so those files are never treated as large files. All other files use the other value, which checks the file size. -The value of annex.largefiles is a -[[preferred content expression|git-annex-preferred-content]] that is -used to match the large files. - Note that, since git attribute values cannot contain whitespace, -it's useful to instead parenthesize the terms of the -[[preferred content expression|git-annex-preferred-content]]. This trick -allows setting the annex.largefiles attribute to more complicated expressions. +it's useful to instead parenthesize the terms of the annex.largefiles +attribute. This trick allows for more complicated expressions. For example, this is the same as the git config shown earlier, shoehorned into a git attribute: |