aboutsummaryrefslogtreecommitdiff
path: root/Annex/FileMatcher.hs
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2013-03-29 16:17:13 -0400
committerGravatar Joey Hess <joey@kitenet.net>2013-03-29 16:17:13 -0400
commit1e5aca5087e573aa93b4b6efe7c6f5abd90d0001 (patch)
treee54552bdc584b338d58003eb6015074cf89465c6 /Annex/FileMatcher.hs
parent7d7b03e9ceab25efad67fc99e5b0813d210b1381 (diff)
New annex.largefiles setting, which configures which files `git annex add` and the assistant add to the annex.
I would have sort of liked to put this in .gitattributes, but it seems it does not support multi-word attribute values. Also, making this a single config setting makes it easy to only parse the expression once. A natural next step would be to make the assistant `git add` files that are not annex.largefiles. OTOH, I don't think `git annex add` should `git add` such files, because git-annex command line tools are not in the business of wrapping git command line tools.
Diffstat (limited to 'Annex/FileMatcher.hs')
-rw-r--r--Annex/FileMatcher.hs86
1 files changed, 86 insertions, 0 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs
new file mode 100644
index 000000000..c32402baf
--- /dev/null
+++ b/Annex/FileMatcher.hs
@@ -0,0 +1,86 @@
+{- git-annex file matching
+ -
+ - Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.FileMatcher where
+
+import qualified Data.Map as M
+
+import Common.Annex
+import Limit
+import Utility.Matcher
+import Types.Group
+import Logs.Group
+import Annex.UUID
+import qualified Annex
+import Git.FilePath
+
+import Data.Either
+import qualified Data.Set as S
+
+type FileMatcher = Matcher MatchFiles
+
+checkFileMatcher :: FileMatcher -> FilePath -> Annex Bool
+checkFileMatcher matcher file = checkFileMatcher' matcher file S.empty True
+
+checkFileMatcher' :: FileMatcher -> FilePath -> AssumeNotPresent -> Bool -> Annex Bool
+checkFileMatcher' matcher file notpresent def
+ | isEmpty matcher = return def
+ | otherwise = do
+ matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
+ let fi = Annex.FileInfo
+ { Annex.matchFile = matchfile
+ , Annex.relFile = file
+ }
+ matchMrun matcher $ \a -> a notpresent fi
+
+matchAll :: FileMatcher
+matchAll = generate []
+
+parsedToMatcher :: [Either String (Token MatchFiles)] -> Either String FileMatcher
+parsedToMatcher parsed = case partitionEithers parsed of
+ ([], vs) -> Right $ generate vs
+ (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
+
+parseToken :: MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
+parseToken checkpresent groupmap t
+ | any (== t) Utility.Matcher.tokens = Right $ Utility.Matcher.token t
+ | t == "present" = use checkpresent
+ | otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
+ M.fromList
+ [ ("include", limitInclude)
+ , ("exclude", limitExclude)
+ , ("copies", limitCopies)
+ , ("inbackend", limitInBackend)
+ , ("largerthan", limitSize (>))
+ , ("smallerthan", limitSize (<))
+ , ("inallgroup", limitInAllGroup groupmap)
+ ]
+ where
+ (k, v) = separate (== '=') t
+ use a = Utility.Matcher.Operation <$> a v
+
+{- This is really dumb tokenization; there's no support for quoted values.
+ - Open and close parens are always treated as standalone tokens;
+ - otherwise tokens must be separated by whitespace. -}
+tokenizeMatcher :: String -> [String]
+tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
+ where
+ splitparens = segmentDelim (`elem` "()")
+
+{- Generates a matcher for files large enough (or meeting other criteria)
+ - to be added to the annex, rather than directly to git. -}
+largeFilesMatcher :: Annex FileMatcher
+largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
+ where
+ go Nothing = return $ matchAll
+ go (Just expr) = do
+ m <- groupMap
+ u <- getUUID
+ either badexpr return $ parsedToMatcher $
+ map (parseToken (limitPresent $ Just u) m)
+ (tokenizeMatcher expr)
+ badexpr e = error $ "bad annex.largefiles configuration: " ++ e