summaryrefslogtreecommitdiff
path: root/Annex/FileMatcher.hs
blob: e97b80c82e42b5c66a22c83737bafc4f233b6a63 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
{- git-annex file matching
 -
 - Copyright 2012-2016 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

{-# LANGUAGE CPP #-}

module Annex.FileMatcher (
	GetFileMatcher,
	checkFileMatcher,
	checkMatcher,
	matchAll,
	preferredContentParser,
	parsedToMatcher,
	mkLargeFilesParser,
	largeFilesMatcher,
) where

import qualified Data.Map as M

import Annex.Common
import Limit
import Utility.Matcher
import Types.Group
import qualified Annex
import Types.FileMatcher
import Git.FilePath
import Types.Remote (RemoteConfig)
import Annex.CheckAttr
import Git.CheckAttr (unspecifiedAttr)

#ifdef WITH_MAGICMIME
import Magic
#endif

import Data.Either
import qualified Data.Set as S

type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)

checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
checkFileMatcher getmatcher file = do
	matcher <- getmatcher file
	checkMatcher matcher Nothing (Just file) S.empty True

checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Bool -> Annex Bool
checkMatcher matcher mkey afile notpresent d
	| isEmpty matcher = return d
	| otherwise = case (mkey, afile) of
		(_, Just file) -> go =<< fileMatchInfo file
		(Just key, _) -> go (MatchingKey key)
		_ -> return d
  where
	go mi = matchMrun matcher $ \a -> a notpresent mi

fileMatchInfo :: FilePath -> Annex MatchInfo
fileMatchInfo file = do
	matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
	return $ MatchingFile FileInfo
		{ matchFile = matchfile
		, currFile = file
		}

matchAll :: FileMatcher Annex
matchAll = generate []

parsedToMatcher :: [ParseResult] -> Either String (FileMatcher Annex)
parsedToMatcher parsed = case partitionEithers parsed of
	([], vs) -> Right $ generate vs
	(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es

data ParseToken
	= SimpleToken String ParseResult
	| ValueToken String (String -> ParseResult)

type ParseResult = Either String (Token (MatchFiles Annex))

parseToken :: [ParseToken] -> String -> ParseResult
parseToken l t
	| t `elem` tokens = Right $ token t
	| otherwise = go l
  where
	go [] = Left $ "near " ++ show t
	go (SimpleToken s r : _) | s == t = r
	go (ValueToken s mkr : _) | s == k = mkr v
	go (_ : ps) = go ps
	(k, v) = separate (== '=') t

commonTokens :: [ParseToken]
commonTokens =
	[ SimpleToken "unused" (simply limitUnused)
	, SimpleToken "anything" (simply limitAnything)
	, SimpleToken "nothing" (simply limitNothing)
	, ValueToken "include" (usev limitInclude)
	, ValueToken "exclude" (usev limitExclude)
	, ValueToken "largerthan" (usev $ limitSize (>))
	, ValueToken "smallerthan" (usev $ limitSize (<))
	]

{- This is really dumb tokenization; there's no support for quoted values.
 - Open and close parens are always treated as standalone tokens;
 - otherwise tokens must be separated by whitespace. -}
tokenizeMatcher :: String -> [String]
tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
  where
	splitparens = segmentDelim (`elem` "()")

preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult]
preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
	map parse $ tokenizeMatcher expr
  where
 	parse = parseToken $
		[ SimpleToken "standard" (call matchstandard)
		, SimpleToken "groupwanted" (call matchgroupwanted)
		, SimpleToken "present" (simply $ limitPresent mu)
		, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
		, ValueToken "copies" (usev limitCopies)
		, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
		, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
		, ValueToken "inbacked" (usev limitInBackend)
		, ValueToken "metadata" (usev limitMetaData)
		, ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap)
		] ++ commonTokens
	preferreddir = fromMaybe "public" $
		M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu

mkLargeFilesParser :: Annex (String -> [ParseResult])
mkLargeFilesParser = do
#ifdef WITH_MAGICMIME
	magicmime <- liftIO $ magicOpen [MagicMimeType]
	liftIO $ magicLoadDefault magicmime
#endif
	let parse = parseToken $ commonTokens
#ifdef WITH_MAGICMIME
		++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
#else
		++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
#endif
	return $ map parse . tokenizeMatcher

{- Generates a matcher for files large enough (or meeting other criteria)
 - to be added to the annex, rather than directly to git. -}
largeFilesMatcher :: Annex GetFileMatcher
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
  where
	go (Just expr) = do
		matcher <- mkmatcher expr
		return $ const $ return matcher
	go Nothing = return $ \file -> do
		expr <- checkAttr "annex.largefiles" file
		if null expr || expr == unspecifiedAttr
			then return matchAll
			else mkmatcher expr

	mkmatcher expr = do
		parser <- mkLargeFilesParser
		either badexpr return $ parsedToMatcher $ parser expr
	badexpr e = error $ "bad annex.largefiles configuration: " ++ e

simply :: MatchFiles Annex -> ParseResult
simply = Right . Operation

usev :: MkLimit Annex -> String -> ParseResult
usev a v = Operation <$> a v

call :: FileMatcher Annex -> ParseResult
call sub = Right $ Operation $ \notpresent mi ->
	matchMrun sub $ \a -> a notpresent mi