summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Joey Hess <joey@kitenet.net>2014-03-14 15:04:33 -0400
committerGravatar Joey Hess <joey@kitenet.net>2014-03-14 15:04:33 -0400
commitf99d54176d97d099d82f073c6c18ab9f0c33399e (patch)
tree1e544c34dac042d585a4451ae50af07b9834fd90
parentb076926ad5c6dbf9353af7968b89f0553b0f4535 (diff)
"standard" can now be used as a first-class keyword in preferred content expressions.
For example "standard or (include=otherdir/*)" or even "not standard" Note that the implementation avoids any potential for loops (if a standard preferred content expression itself mentioned standard). This commit was sponsored by Jochen Bartl.
-rw-r--r--Annex/FileMatcher.hs17
-rw-r--r--Logs/PreferredContent.hs32
-rw-r--r--debian/changelog2
-rw-r--r--doc/preferred_content.mdwn149
-rw-r--r--doc/preferred_content/standard_groups.mdwn115
5 files changed, 166 insertions, 149 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs
index 750795280..524ae3c7e 100644
--- a/Annex/FileMatcher.hs
+++ b/Annex/FileMatcher.hs
@@ -56,23 +56,26 @@ parsedToMatcher parsed = case partitionEithers parsed of
([], vs) -> Right $ generate vs
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
-exprParser :: GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)]
-exprParser groupmap configmap mu expr =
+exprParser :: FileMatcher -> GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)]
+exprParser matchstandard groupmap configmap mu expr =
map parse $ tokenizeMatcher expr
where
- parse = parseToken
+ parse = parseToken
+ matchstandard
(limitPresent mu)
(limitInDir preferreddir)
groupmap
preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
-parseToken :: MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
-parseToken checkpresent checkpreferreddir groupmap t
+parseToken :: FileMatcher -> MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
+parseToken matchstandard checkpresent checkpreferreddir groupmap t
| t `elem` tokens = Right $ token t
+ | t == "standard" = Right $ Operation $ \notpresent mi ->
+ matchMrun matchstandard $ \a -> a notpresent mi
| t == "present" = use checkpresent
| t == "inpreferreddir" = use checkpreferreddir
- | t == "unused" = Right (Operation limitUnused)
+ | t == "unused" = Right $ Operation limitUnused
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
M.fromList
[ ("include", limitInclude)
@@ -109,5 +112,5 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
rc <- readRemoteLog
u <- getUUID
either badexpr return $
- parsedToMatcher $ exprParser gm rc (Just u) expr
+ parsedToMatcher $ exprParser matchAll gm rc (Just u) expr
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs
index 4b25ea094..2bc5f08d6 100644
--- a/Logs/PreferredContent.hs
+++ b/Logs/PreferredContent.hs
@@ -67,29 +67,25 @@ preferredContentMapLoad = do
- versions of git-annex may add new features. Instead, parse errors
- result in a Matcher that will always succeed. -}
makeMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> PreferredContentExpression -> FileMatcher
-makeMatcher groupmap configmap u expr
- | expr == "standard" = standardMatcher groupmap configmap u
- | null (lefts tokens) = Utility.Matcher.generate $ rights tokens
- | otherwise = matchAll
+makeMatcher groupmap configmap u = go True
where
- tokens = exprParser groupmap configmap (Just u) expr
-
-{- Standard matchers are pre-defined for some groups. If none is defined,
- - or a repository is in multiple groups with standard matchers, match all. -}
-standardMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> FileMatcher
-standardMatcher groupmap configmap u =
- maybe matchAll (makeMatcher groupmap configmap u . preferredContent) $
- getStandardGroup =<< u `M.lookup` groupsByUUID groupmap
+ go expandstandard expr
+ | null (lefts tokens) = Utility.Matcher.generate $ rights tokens
+ | otherwise = matchAll
+ where
+ tokens = exprParser matchstandard groupmap configmap (Just u) expr
+ matchstandard
+ | expandstandard = maybe matchAll (go False . preferredContent) $
+ getStandardGroup =<< u `M.lookup` groupsByUUID groupmap
+ | otherwise = matchAll
{- Checks if an expression can be parsed, if not returns Just error -}
checkPreferredContentExpression :: PreferredContentExpression -> Maybe String
-checkPreferredContentExpression expr
- | expr == "standard" = Nothing
- | otherwise = case parsedToMatcher tokens of
- Left e -> Just e
- Right _ -> Nothing
+checkPreferredContentExpression expr = case parsedToMatcher tokens of
+ Left e -> Just e
+ Right _ -> Nothing
where
- tokens = exprParser emptyGroupMap M.empty Nothing expr
+ tokens = exprParser matchAll emptyGroupMap M.empty Nothing expr
{- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group, unless something is already set. -}
diff --git a/debian/changelog b/debian/changelog
index e3cb63d77..60c6f1030 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -17,6 +17,8 @@ git-annex (5.20140307) UNRELEASED; urgency=medium
* Fix ssh connection caching stop method to work with openssh 6.5p1,
which broke the old method.
* Better workaround for problem umasks when eg, setting up ssh keys.
+ * "standard" can now be used as a first-class keyword in preferred content
+ expressions. For example "standard or (include=otherdir/*)"
-- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400
diff --git a/doc/preferred_content.mdwn b/doc/preferred_content.mdwn
index 0bc8aa35c..d67cba635 100644
--- a/doc/preferred_content.mdwn
+++ b/doc/preferred_content.mdwn
@@ -18,6 +18,20 @@ If a file matches, it's preferred to have its content stored in the
repository. If it doesn't, it's preferred to drop its content from
the repository (if there are enough copies elsewhere).
+Rather than writing your own preferred content expression, you can use
+several canned ones included in git-annex that are tuned to cover different
+common use cases. You do this by putting a repository in a group,
+and simply setting its preferred content to "standard" to match whatever
+is standard for that group. See [[standard_groups]].
+
+To check at the command line which files are matched by preferred content
+settings, you can use the --want-get and --want-drop options.
+
+For example, "git annex find --want-get --not --in ." will find all the
+files that "git annex get --auto" will want to get, and "git annex find
+--want-drop --in ." will find all the files that "git annex drop --auto"
+will want to drop.
+
The expressions are very similar to the matching options documented
on the [[git-annex]] man page. At the command line, you can use those
options in commands like this:
@@ -86,130 +100,17 @@ The name of the directory can be configured using
(If no directory name is configured, it uses "public" by default.)
-## testing preferred content settings
-
-To check at the command line which files are matched by preferred content
-settings, you can use the --want-get and --want-drop options.
-
-For example, "git annex find --want-get --not --in ." will find all the
-files that "git annex get --auto" will want to get, and "git annex find
---want-drop --in ." will find all the files that "git annex drop --auto"
-will want to drop.
-
-## standard expressions
-
-git-annex comes with some standard preferred content expressions, that can
-be used with repositories that are in some pre-defined groups. To make a
-repository use one of these, just set its preferred content expression
-to "standard", and put it in one of these groups.
-
-(Note that most of these standard expressions also make the repository
-prefer any content that is only currently available on untrusted and
-dead repositories. So if an untrusted repository gets connected,
-any repository that can will back it up.)
-
-### client
-
-All content is preferred, unless it's for a file in a "archive" directory,
-which has reached an archive repository, or is unused.
-
-`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) and not unused) or roughlylackingcopies=1`
-
-### transfer
-
-Use for repositories that are used to transfer data between other
-repositories, but do not need to retain data themselves. For
-example, a repository on a server, or in the cloud, or a small
-USB drive used in a sneakernet.
-
-The preferred content expression for these causes them to get and retain
-data until all clients have a copy.
-
-`not (inallgroup=client and copies=client:2) and ($client)`
-
-(Where $client is a copy of the preferred content expression used for
-clients.)
-
-The "copies=client:2" part of the above handles the case where
-there is only one client repository. It makes a transfer repository
-speculatively prefer content in this case, even though it as of yet
-has nowhere to transfer it to. Presumably, another client repository
-will be added later.
-
-### backup
-
-All content is preferred.
-
-`include=* or unused`
-
-### incremental backup
-
-Only prefers content that's not already backed up to another backup
-or incremental backup repository.
-
-`((include=* or unused) and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1`
-
-### small archive
-
-Only prefers content that's located in an "archive" directory, and
-only if it's not already been archived somewhere else.
-
-`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
-
-### full archive
-
-All content is preferred, unless it's already been archived somewhere else.
-
-`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
-
-Note that if you want to archive multiple copies (not a bad idea!),
-you should instead configure all your archive repositories with a
-version of the above preferred content expression with a larger
-number of copies.
-
-### source
-
-Use for repositories where files are often added, but that do not need to
-retain files for local use. For example, a repository on a camera, where
-it's desirable to remove photos as soon as they're transferred elsewhere.
-
-The preferred content expression for these causes them to only retain
-data until a copy has been sent to some other repository.
-
-`not (copies=1)`
-
-### manual
-
-This gives you nearly full manual control over what content is stored in the
-repository. This allows using the [[assistant]] without it trying to keep a
-local copy of every file. Instead, you can manually run `git annex get`,
-`git annex drop`, etc to manage content. Only content that is present
-is preferred.
-
-The exception to this manual control is that content that a client
-repository would not want is not preferred. So, files in archive
-directories are not preferred once their content has
-reached an archive repository.
-
-`present and ($client)`
-
-(Where $client is a copy of the preferred content expression used for
-clients.)
-
-### public
-
-This is used for publishing information to a repository that can be
-publically accessed. Only files in a directory with a particular name
-will be published. (The directory can be located anywhere in the
-repository.)
-
-The name of the directory can be configured using
-`git annex enableremote $remote preferreddir=$dirname`
+### difference: "standard"
-### unwanted
+git-annex comes with some standard preferred content expressions, that
+can be used with repositories that are in some pre-defined groups,
+as listed in [[standard_groups]].
-Use for repositories that you don't want to exist. This will result
-in any content on them being moved away to other repositories. (Works
-best when the unwanted repository is also marked as untrusted or dead.)
+When a repository is in exactly one such group, you can use the "standard"
+keyword in its preferred content expression, to match whatever content
+the group preferrs to have. (If a repository is put into multiple standard
+groups, "standard" will match anything.. so don't do that!)
-`exclude=*`
+Most often, the whole preferred content expression is simply "standard".
+But, you can do more complicated things, for example:
+"`standard or include=otherdir/*`"
diff --git a/doc/preferred_content/standard_groups.mdwn b/doc/preferred_content/standard_groups.mdwn
new file mode 100644
index 000000000..7f2afd446
--- /dev/null
+++ b/doc/preferred_content/standard_groups.mdwn
@@ -0,0 +1,115 @@
+git-annex comes with some pre-defined [[preferred_content]] settings, that can
+be used with repositories that are in special groups. To make a
+repository use one of these, just set its preferred content expression
+to "standard", and put it in one of these groups.
+
+(Note that most of these standard expressions also make the repository
+prefer any content that is only currently available on untrusted and
+dead repositories. So if an untrusted repository gets connected,
+any repository that can will back it up.)
+
+### client
+
+All content is preferred, unless it's for a file in a "archive" directory,
+which has reached an archive repository, or is unused.
+
+`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) and not unused) or roughlylackingcopies=1`
+
+### transfer
+
+Use for repositories that are used to transfer data between other
+repositories, but do not need to retain data themselves. For
+example, a repository on a server, or in the cloud, or a small
+USB drive used in a sneakernet.
+
+The preferred content expression for these causes them to get and retain
+data until all clients have a copy.
+
+`not (inallgroup=client and copies=client:2) and ($client)`
+
+(Where $client is a copy of the preferred content expression used for
+clients.)
+
+The "copies=client:2" part of the above handles the case where
+there is only one client repository. It makes a transfer repository
+speculatively prefer content in this case, even though it as of yet
+has nowhere to transfer it to. Presumably, another client repository
+will be added later.
+
+### backup
+
+All content is preferred.
+
+`include=* or unused`
+
+### incremental backup
+
+Only prefers content that's not already backed up to another backup
+or incremental backup repository.
+
+`((include=* or unused) and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1`
+
+### small archive
+
+Only prefers content that's located in an "archive" directory, and
+only if it's not already been archived somewhere else.
+
+`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
+
+### full archive
+
+All content is preferred, unless it's already been archived somewhere else.
+
+`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
+
+Note that if you want to archive multiple copies (not a bad idea!),
+you should instead configure all your archive repositories with a
+version of the above preferred content expression with a larger
+number of copies.
+
+### source
+
+Use for repositories where files are often added, but that do not need to
+retain files for local use. For example, a repository on a camera, where
+it's desirable to remove photos as soon as they're transferred elsewhere.
+
+The preferred content expression for these causes them to only retain
+data until a copy has been sent to some other repository.
+
+`not (copies=1)`
+
+### manual
+
+This gives you nearly full manual control over what content is stored in the
+repository. This allows using the [[assistant]] without it trying to keep a
+local copy of every file. Instead, you can manually run `git annex get`,
+`git annex drop`, etc to manage content. Only content that is present
+is preferred.
+
+The exception to this manual control is that content that a client
+repository would not want is not preferred. So, files in archive
+directories are not preferred once their content has
+reached an archive repository.
+
+`present and ($client)`
+
+(Where $client is a copy of the preferred content expression used for
+clients.)
+
+### public
+
+This is used for publishing information to a repository that can be
+publically accessed. Only files in a directory with a particular name
+will be published. (The directory can be located anywhere in the
+repository.)
+
+The name of the directory can be configured using
+`git annex enableremote $remote preferreddir=$dirname`
+
+### unwanted
+
+Use for repositories that you don't want to exist. This will result
+in any content on them being moved away to other repositories. (Works
+best when the unwanted repository is also marked as untrusted or dead.)
+
+`exclude=*`