summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/FileMatcher.hs86
-rw-r--r--Assistant/Threads/Watcher.hs24
-rw-r--r--Command/Add.hs15
-rw-r--r--Limit.hs11
-rw-r--r--Logs/PreferredContent.hs54
-rw-r--r--Types/GitConfig.hs2
-rw-r--r--debian/changelog2
-rw-r--r--doc/git-annex.mdwn38
8 files changed, 169 insertions, 63 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs
new file mode 100644
index 000000000..c32402baf
--- /dev/null
+++ b/Annex/FileMatcher.hs
@@ -0,0 +1,86 @@
+{- git-annex file matching
+ -
+ - Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.FileMatcher where
+
+import qualified Data.Map as M
+
+import Common.Annex
+import Limit
+import Utility.Matcher
+import Types.Group
+import Logs.Group
+import Annex.UUID
+import qualified Annex
+import Git.FilePath
+
+import Data.Either
+import qualified Data.Set as S
+
+type FileMatcher = Matcher MatchFiles
+
+checkFileMatcher :: FileMatcher -> FilePath -> Annex Bool
+checkFileMatcher matcher file = checkFileMatcher' matcher file S.empty True
+
+checkFileMatcher' :: FileMatcher -> FilePath -> AssumeNotPresent -> Bool -> Annex Bool
+checkFileMatcher' matcher file notpresent def
+ | isEmpty matcher = return def
+ | otherwise = do
+ matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
+ let fi = Annex.FileInfo
+ { Annex.matchFile = matchfile
+ , Annex.relFile = file
+ }
+ matchMrun matcher $ \a -> a notpresent fi
+
+matchAll :: FileMatcher
+matchAll = generate []
+
+parsedToMatcher :: [Either String (Token MatchFiles)] -> Either String FileMatcher
+parsedToMatcher parsed = case partitionEithers parsed of
+ ([], vs) -> Right $ generate vs
+ (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
+
+parseToken :: MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
+parseToken checkpresent groupmap t
+ | any (== t) Utility.Matcher.tokens = Right $ Utility.Matcher.token t
+ | t == "present" = use checkpresent
+ | otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
+ M.fromList
+ [ ("include", limitInclude)
+ , ("exclude", limitExclude)
+ , ("copies", limitCopies)
+ , ("inbackend", limitInBackend)
+ , ("largerthan", limitSize (>))
+ , ("smallerthan", limitSize (<))
+ , ("inallgroup", limitInAllGroup groupmap)
+ ]
+ where
+ (k, v) = separate (== '=') t
+ use a = Utility.Matcher.Operation <$> a v
+
+{- This is really dumb tokenization; there's no support for quoted values.
+ - Open and close parens are always treated as standalone tokens;
+ - otherwise tokens must be separated by whitespace. -}
+tokenizeMatcher :: String -> [String]
+tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
+ where
+ splitparens = segmentDelim (`elem` "()")
+
+{- Generates a matcher for files large enough (or meeting other criteria)
+ - to be added to the annex, rather than directly to git. -}
+largeFilesMatcher :: Annex FileMatcher
+largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
+ where
+ go Nothing = return $ matchAll
+ go (Just expr) = do
+ m <- groupMap
+ u <- getUUID
+ either badexpr return $ parsedToMatcher $
+ map (parseToken (limitPresent $ Just u) m)
+ (tokenizeMatcher expr)
+ badexpr e = error $ "bad annex.largefiles configuration: " ++ e
diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs
index 8d06e6659..84193de20 100644
--- a/Assistant/Threads/Watcher.hs
+++ b/Assistant/Threads/Watcher.hs
@@ -35,6 +35,7 @@ import Annex.Direct
import Annex.Content.Direct
import Annex.CatFile
import Annex.Link
+import Annex.FileMatcher
import Git.Types
import Config
import Utility.ThreadScheduler
@@ -77,8 +78,9 @@ watchThread = namedThread "Watcher" $
runWatcher :: Assistant ()
runWatcher = do
startup <- asIO1 startupScan
+ matcher <- liftAnnex $ largeFilesMatcher
direct <- liftAnnex isDirect
- addhook <- hook $ if direct then onAddDirect else onAdd
+ addhook <- hook $ if direct then onAddDirect matcher else onAdd matcher
delhook <- hook onDel
addsymlinkhook <- hook $ onAddSymlink direct
deldirhook <- hook onDelDir
@@ -166,16 +168,22 @@ runHandler handler file filestatus = void $ do
liftAnnex $ Annex.Queue.flushWhenFull
recordChange change
-onAdd :: Handler
-onAdd file filestatus
- | maybe False isRegularFile filestatus = pendingAddChange file
+checkAdd :: FileMatcher -> FilePath -> Assistant (Maybe Change)
+checkAdd matcher file = ifM (liftAnnex $ checkFileMatcher matcher file)
+ ( pendingAddChange file
+ , noChange
+ )
+
+onAdd :: FileMatcher -> Handler
+onAdd matcher file filestatus
+ | maybe False isRegularFile filestatus = checkAdd matcher file
| otherwise = noChange
{- In direct mode, add events are received for both new files, and
- modified existing files. Or, in some cases, existing files that have not
- really been modified. -}
-onAddDirect :: Handler
-onAddDirect file fs = do
+onAddDirect :: FileMatcher -> Handler
+onAddDirect matcher file fs = do
debug ["add direct", file]
v <- liftAnnex $ catKeyFile file
case (v, fs) of
@@ -184,9 +192,9 @@ onAddDirect file fs = do
( noChange
, do
liftAnnex $ changedDirect key file
- pendingAddChange file
+ checkAdd matcher file
)
- _ -> pendingAddChange file
+ _ -> checkAdd matcher file
{- A symlink might be an arbitrary symlink, which is just added.
- Or, if it is a git-annex symlink, ensure it points to the content
diff --git a/Command/Add.hs b/Command/Add.hs
index cf2a55c50..83b1ca22c 100644
--- a/Command/Add.hs
+++ b/Command/Add.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010 Joey Hess <joey@kitenet.net>
+ - Copyright 2010, 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -27,6 +27,7 @@ import Utility.Touch
import Utility.FileMode
import Config
import Utility.InodeCache
+import Annex.FileMatcher
def :: [Command]
def = [notBareRepo $ command "add" paramPaths seek SectionCommon
@@ -37,10 +38,16 @@ def = [notBareRepo $ command "add" paramPaths seek SectionCommon
- In direct mode, it acts on any files that have changed. -}
seek :: [CommandSeek]
seek =
- [ withFilesNotInGit start
- , whenNotDirect $ withFilesUnlocked start
- , whenDirect $ withFilesMaybeModified start
+ [ go withFilesNotInGit
+ , whenNotDirect $ go withFilesUnlocked
+ , whenDirect $ go withFilesMaybeModified
]
+ where
+ go a = withValue largeFilesMatcher $ \matcher ->
+ a $ \file -> ifM (checkFileMatcher matcher file)
+ ( start file
+ , stop
+ )
{- The add subcommand annexes a file, generating a key for it using a
- backend, and then moving it into the annex directory and setting up
diff --git a/Limit.hs b/Limit.hs
index d7f82eb84..98227144d 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -204,10 +204,15 @@ addSmallerThan = addLimit . limitSize (<)
limitSize :: (Maybe Integer -> Maybe Integer -> Bool) -> MkLimit
limitSize vs s = case readSize dataUnits s of
Nothing -> Left "bad size"
- Just sz -> Right $ const $ lookupFile >=> check sz
+ Just sz -> Right $ go sz
where
- check _ Nothing = return False
- check sz (Just (key, _)) = return $ keySize key `vs` Just sz
+ go sz _ fi = lookupFile fi >>= check fi sz
+ check _ sz (Just (key, _)) = return $ keySize key `vs` Just sz
+ check fi sz Nothing = do
+ filesize <- liftIO $ catchMaybeIO $
+ fromIntegral . fileSize
+ <$> getFileStatus (Annex.relFile fi)
+ return $ filesize `vs` Just sz
addTimeLimit :: String -> Annex ()
addTimeLimit s = do
diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs
index 3340cf5ef..0efe42e17 100644
--- a/Logs/PreferredContent.hs
+++ b/Logs/PreferredContent.hs
@@ -27,8 +27,8 @@ import qualified Annex
import Logs.UUIDBased
import Limit
import qualified Utility.Matcher
+import Annex.FileMatcher
import Annex.UUID
-import Git.FilePath
import Types.Group
import Logs.Group
import Types.StandardGroups
@@ -50,19 +50,11 @@ preferredContentSet NoUUID _ = error "unknown UUID; cannot modify"
- (or the current repository if none is specified). -}
isPreferredContent :: Maybe UUID -> AssumeNotPresent -> FilePath -> Bool -> Annex Bool
isPreferredContent mu notpresent file def = do
- matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
- let fi = Annex.FileInfo
- { Annex.matchFile = matchfile
- , Annex.relFile = file
- }
u <- maybe getUUID return mu
m <- preferredContentMap
case M.lookup u m of
Nothing -> return def
- Just matcher
- | Utility.Matcher.isEmpty matcher -> return def
- | otherwise -> Utility.Matcher.matchMrun matcher $
- \a -> a notpresent fi
+ Just matcher -> checkFileMatcher' matcher file notpresent def
{- The map is cached for speed. -}
preferredContentMap :: Annex Annex.PreferredContentMap
@@ -87,56 +79,30 @@ preferredContentMapRaw = simpleMap . parseLog Just
- because the configuration is shared amoung repositories and newer
- versions of git-annex may add new features. Instead, parse errors
- result in a Matcher that will always succeed. -}
-makeMatcher :: GroupMap -> UUID -> String -> Utility.Matcher.Matcher MatchFiles
+makeMatcher :: GroupMap -> UUID -> String -> FileMatcher
makeMatcher groupmap u s
| s == "standard" = standardMatcher groupmap u
| null (lefts tokens) = Utility.Matcher.generate $ rights tokens
| otherwise = matchAll
where
- tokens = map (parseToken (Just u) groupmap) (tokenizeMatcher s)
+ tokens = map (parseToken (limitPresent $ Just u) groupmap) (tokenizeMatcher s)
{- Standard matchers are pre-defined for some groups. If none is defined,
- or a repository is in multiple groups with standard matchers, match all. -}
-standardMatcher :: GroupMap -> UUID -> Utility.Matcher.Matcher MatchFiles
+standardMatcher :: GroupMap -> UUID -> FileMatcher
standardMatcher m u = maybe matchAll (makeMatcher m u . preferredContent) $
getStandardGroup =<< u `M.lookup` groupsByUUID m
-matchAll :: Utility.Matcher.Matcher MatchFiles
-matchAll = Utility.Matcher.generate []
-
{- Checks if an expression can be parsed, if not returns Just error -}
checkPreferredContentExpression :: String -> Maybe String
checkPreferredContentExpression s
| s == "standard" = Nothing
- | otherwise = case lefts $ map (parseToken Nothing emptyGroupMap) (tokenizeMatcher s) of
- [] -> Nothing
- l -> Just $ unwords $ map ("Parse failure: " ++) l
-
-parseToken :: (Maybe UUID) -> GroupMap -> String -> Either String (Utility.Matcher.Token MatchFiles)
-parseToken mu groupmap t
- | any (== t) Utility.Matcher.tokens = Right $ Utility.Matcher.token t
- | t == "present" = use $ limitPresent mu
- | otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
- M.fromList
- [ ("include", limitInclude)
- , ("exclude", limitExclude)
- , ("copies", limitCopies)
- , ("inbackend", limitInBackend)
- , ("largerthan", limitSize (>))
- , ("smallerthan", limitSize (<))
- , ("inallgroup", limitInAllGroup groupmap)
- ]
- where
- (k, v) = separate (== '=') t
- use a = Utility.Matcher.Operation <$> a v
-
-{- This is really dumb tokenization; there's no support for quoted values.
- - Open and close parens are always treated as standalone tokens;
- - otherwise tokens must be separated by whitespace. -}
-tokenizeMatcher :: String -> [String]
-tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
+ | otherwise = case parsedToMatcher vs of
+ Left e -> Just e
+ Right _ -> Nothing
where
- splitparens = segmentDelim (`elem` "()")
+ vs = map (parseToken (limitPresent Nothing) emptyGroupMap)
+ (tokenizeMatcher s)
{- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group, unless something is already set. -}
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index b42f8f229..246c320d0 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -37,6 +37,7 @@ data GitConfig = GitConfig
, annexAutoCommit :: Bool
, annexWebOptions :: [String]
, annexCrippledFileSystem :: Bool
+ , annexLargeFiles :: Maybe String
, coreSymlinks :: Bool
}
@@ -59,6 +60,7 @@ extractGitConfig r = GitConfig
, annexAutoCommit = getbool (annex "autocommit") True
, annexWebOptions = getwords (annex "web-options")
, annexCrippledFileSystem = getbool (annex "crippledfilesystem") False
+ , annexLargeFiles = getmaybe (annex "largefiles")
, coreSymlinks = getbool "core.symlinks" True
}
where
diff --git a/debian/changelog b/debian/changelog
index e7b79f34b..241fd4bde 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,8 @@ git-annex (4.20130324) UNRELEASED; urgency=low
* webapp: Run ssh server probes in a way that will work when the
login shell is a monstrosity that should have died 25 years ago,
such as csh.
+ * New annex.largefiles setting, which configures which files
+ `git annex add` and the assistant add to the annex.
-- Joey Hess <joeyh@debian.org> Mon, 25 Mar 2013 10:21:46 -0400
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 35a1b2cdf..502c1e168 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -735,6 +735,23 @@ file contents are present at either of two repositories.
Closes a group of file matching options.
+# PREFERRED CONTENT
+
+Each repository has a preferred content setting, which specifies content
+that the repository wants to have present. These settings can be configured
+using `git annex vicfg`. They are used by the `--auto` option, and
+by the git-annex assistant.
+
+The preferred content settings are similar, but not identical to
+the file matching options specified above, just without the dashes.
+For example:
+
+ exclude=archive/* and (include=*.mp3 or smallerthan=1mb)
+
+The main differences are that `exclude=` and `include=` always
+match relative to the top of the git repository, and that there is
+no equivilant to --in.
+
# CONFIGURATION
Like other git commands, git-annex is configured via `.git/config`.
@@ -765,6 +782,19 @@ Here are all the supported configuration settings.
The default reserve is 1 megabyte.
+* `annex.largefiles`
+
+ Allows configuring which files `git annex add` and the assistant consider
+ to be large enough to need to be added to the annex. By default,
+ all files are added to the annex.
+
+ The value is a preferred content expression. See PREFERRED CONTENT
+ for details.
+
+ Example:
+
+ annex.largefiles = largerthan=100kb or include=*.mp3
+
* `annex.queuesize`
git-annex builds a queue of git commands, in order to combine similar
@@ -790,10 +820,6 @@ Here are all the supported configuration settings.
the accuracy will make `git annex unused` consume more memory;
run `git annex status` for memory usage numbers.
-* `annex.version`
-
- Automatically maintained, and used to automate upgrades between versions.
-
* `annex.sshcaching`
By default, git-annex caches ssh connections
@@ -819,6 +845,10 @@ Here are all the supported configuration settings.
Set to false to prevent the git-annex assistant from automatically
committing changes to files in the repository.
+* `annex.version`
+
+ Automatically maintained, and used to automate upgrades between versions.
+
* `annex.direct`
Set to true when the repository is in direct mode. Should not be set